D’ici 2050, la population mondiale atteindra 9,1 milliards d’habitants, entraînant une augmentation de 70 % des besoins alimentaires. Cette situation, combinée à la réduction des terres agricoles disponibles en raison de l’urbanisation rapide, nécessite des innovations majeures dans la gestion agricole (Sharma et al., 2023). L’agriculture de précision, un domaine en plein essor, utilise les données et la technologie pour optimiser la production agricole, en intégrant la variabilité biophysique des sols dans le processus décisionnel.
Le travail proposé a été réalisé à partir d’un jeu de données collecté en Inde, un pays où 70 % de la population pratique l’agriculture, contribuant à environ 17 % du PIB national (Katarya et al., 2020). L’objectif de l’étude est d’utiliser le machine learning pour prédire les cultures les plus adaptées en fonction des caractéristiques du sol, optimisant ainsi l’usage des ressources agricoles.
dta <- read.table('Crop_recommendation.csv', sep = ';', header = TRUE, stringsAsFactors = TRUE)
summary(dta)
## N P K temperature
## Min. : 0.00 Min. : 5.00 Min. : 5.00 Min. : 8.826
## 1st Qu.: 21.00 1st Qu.: 28.00 1st Qu.: 20.00 1st Qu.:22.769
## Median : 37.00 Median : 51.00 Median : 32.00 Median :25.599
## Mean : 50.55 Mean : 53.36 Mean : 48.15 Mean :25.616
## 3rd Qu.: 84.25 3rd Qu.: 68.00 3rd Qu.: 49.00 3rd Qu.:28.562
## Max. :140.00 Max. :145.00 Max. :205.00 Max. :43.675
##
## humidity ph rainfall label
## Min. :14.26 Min. :3.505 Min. : 20.21 apple : 100
## 1st Qu.:60.26 1st Qu.:5.972 1st Qu.: 64.55 banana : 100
## Median :80.47 Median :6.425 Median : 94.87 blackgram: 100
## Mean :71.48 Mean :6.469 Mean :103.46 chickpea : 100
## 3rd Qu.:89.95 3rd Qu.:6.924 3rd Qu.:124.27 coconut : 100
## Max. :99.98 Max. :9.935 Max. :298.56 coffee : 100
## (Other) :1600
require(FactoMineR)
## Loading required package: FactoMineR
resPCA2 <- PCA(dta, scale.unit = TRUE, quali.sup = 8)
plot(resPCA2,habillage=8,label="quali")
sept variables physico-chimiques des sols et une variable décrivant le type de culture adaptée. On a ici 100 observations par culture, le jeu de données est donc équilibré.
require(class)
## Loading required package: class
require(caret)
## Loading required package: caret
## Loading required package: ggplot2
## Loading required package: lattice
set.seed(123)
trainIndex <- createDataPartition(dta$label, p = 0.75, list = FALSE)
data.train <- dta[trainIndex, ]
data.test <- dta[-trainIndex, ]
ctrl <- trainControl(method = "cv", number = 10) # 10-fold cross-validation
# Entraîner le modèle regression avec la validation croisée
reg_model <- train(label ~ ., data = data.train, method = "multinom", trControl = ctrl)
## # weights: 198 (168 variable)
## initial value 4590.198043
## iter 10 value 2270.906904
## iter 20 value 1953.794715
## iter 30 value 1820.805812
## iter 40 value 1642.752958
## iter 50 value 1580.922609
## iter 60 value 1535.122220
## iter 70 value 1457.049555
## iter 80 value 1402.949969
## iter 90 value 1371.482559
## iter 100 value 1351.064193
## final value 1351.064193
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4590.198043
## iter 10 value 2270.914686
## iter 20 value 1953.827061
## iter 30 value 1820.706664
## iter 40 value 1642.331449
## iter 50 value 1575.512108
## iter 60 value 1525.258994
## iter 70 value 1452.408294
## iter 80 value 1398.261498
## iter 90 value 1371.858115
## iter 100 value 1348.666434
## final value 1348.666434
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4590.198043
## iter 10 value 2270.906912
## iter 20 value 1953.794747
## iter 30 value 1820.805713
## iter 40 value 1642.752526
## iter 50 value 1580.924790
## iter 60 value 1535.138146
## iter 70 value 1457.211144
## iter 80 value 1403.148738
## iter 90 value 1372.338701
## iter 100 value 1352.299540
## final value 1352.299540
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4584.015958
## iter 10 value 2451.923152
## iter 20 value 1901.408797
## iter 30 value 1682.658508
## iter 40 value 1545.309598
## iter 50 value 1468.742953
## iter 60 value 1380.831784
## iter 70 value 1306.263950
## iter 80 value 1254.930828
## iter 90 value 1224.393465
## iter 100 value 1178.543460
## final value 1178.543460
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4584.015958
## iter 10 value 2451.938280
## iter 20 value 1901.440470
## iter 30 value 1682.713508
## iter 40 value 1545.376256
## iter 50 value 1468.893165
## iter 60 value 1382.093732
## iter 70 value 1307.462570
## iter 80 value 1259.734495
## iter 90 value 1233.190307
## iter 100 value 1185.623443
## final value 1185.623443
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4584.015958
## iter 10 value 2451.923167
## iter 20 value 1901.408829
## iter 30 value 1682.658563
## iter 40 value 1545.309665
## iter 50 value 1468.743103
## iter 60 value 1380.833026
## iter 70 value 1306.265121
## iter 80 value 1254.929160
## iter 90 value 1224.393638
## iter 100 value 1178.551265
## final value 1178.551265
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4590.198043
## iter 10 value 2479.465970
## iter 20 value 2058.087696
## iter 30 value 1807.177724
## iter 40 value 1665.284487
## iter 50 value 1570.802820
## iter 60 value 1474.899858
## iter 70 value 1381.878132
## iter 80 value 1322.098919
## iter 90 value 1292.164662
## iter 100 value 1240.473578
## final value 1240.473578
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4590.198043
## iter 10 value 2479.480662
## iter 20 value 2058.134167
## iter 30 value 1807.263641
## iter 40 value 1665.196982
## iter 50 value 1569.640186
## iter 60 value 1473.712234
## iter 70 value 1377.375261
## iter 80 value 1316.607404
## iter 90 value 1284.865393
## iter 100 value 1213.228750
## final value 1213.228750
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4590.198043
## iter 10 value 2479.465984
## iter 20 value 2058.087743
## iter 30 value 1807.177810
## iter 40 value 1665.284397
## iter 50 value 1570.801595
## iter 60 value 1474.896731
## iter 70 value 1381.870087
## iter 80 value 1322.098207
## iter 90 value 1292.107978
## iter 100 value 1236.201027
## final value 1236.201027
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4587.107001
## iter 10 value 2435.825563
## iter 20 value 1882.663098
## iter 30 value 1696.986710
## iter 40 value 1571.637986
## iter 50 value 1484.292455
## iter 60 value 1410.235216
## iter 70 value 1322.540916
## iter 80 value 1269.123233
## iter 90 value 1236.385813
## iter 100 value 1185.638272
## final value 1185.638272
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4587.107001
## iter 10 value 2435.841299
## iter 20 value 1882.696862
## iter 30 value 1697.046950
## iter 40 value 1571.749801
## iter 50 value 1484.437141
## iter 60 value 1411.053473
## iter 70 value 1322.455726
## iter 80 value 1282.318455
## iter 90 value 1246.912614
## iter 100 value 1195.901376
## final value 1195.901376
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4587.107001
## iter 10 value 2435.825578
## iter 20 value 1882.663132
## iter 30 value 1696.986770
## iter 40 value 1571.638098
## iter 50 value 1484.292600
## iter 60 value 1410.235911
## iter 70 value 1322.539522
## iter 80 value 1269.116059
## iter 90 value 1236.519586
## iter 100 value 1188.778807
## final value 1188.778807
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4584.015958
## iter 10 value 2468.253627
## iter 20 value 1938.084108
## iter 30 value 1708.999802
## iter 40 value 1552.363703
## iter 50 value 1477.509990
## iter 60 value 1401.844356
## iter 70 value 1339.582587
## iter 80 value 1291.773745
## iter 90 value 1263.268583
## iter 100 value 1217.436031
## final value 1217.436031
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4584.015958
## iter 10 value 2468.269216
## iter 20 value 1938.123817
## iter 30 value 1709.057008
## iter 40 value 1552.473137
## iter 50 value 1477.543871
## iter 60 value 1403.220533
## iter 70 value 1340.929995
## iter 80 value 1292.619397
## iter 90 value 1259.555547
## iter 100 value 1210.759726
## final value 1210.759726
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4584.015958
## iter 10 value 2468.253643
## iter 20 value 1938.084148
## iter 30 value 1708.999859
## iter 40 value 1552.363812
## iter 50 value 1477.510016
## iter 60 value 1401.845916
## iter 70 value 1339.584938
## iter 80 value 1291.777969
## iter 90 value 1263.263316
## iter 100 value 1213.261113
## final value 1213.261113
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4602.562213
## iter 10 value 2494.534953
## iter 20 value 1957.894674
## iter 30 value 1719.333879
## iter 40 value 1560.539258
## iter 50 value 1484.754570
## iter 60 value 1378.722639
## iter 70 value 1284.497882
## iter 80 value 1230.433016
## iter 90 value 1192.054828
## iter 100 value 1136.348359
## final value 1136.348359
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4602.562213
## iter 10 value 2494.550428
## iter 20 value 1957.917417
## iter 30 value 1719.388971
## iter 40 value 1560.656001
## iter 50 value 1484.919972
## iter 60 value 1385.467631
## iter 70 value 1290.315448
## iter 80 value 1243.245485
## iter 90 value 1202.908772
## iter 100 value 1145.634866
## final value 1145.634866
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4602.562213
## iter 10 value 2494.534969
## iter 20 value 1957.894697
## iter 30 value 1719.333934
## iter 40 value 1560.539374
## iter 50 value 1484.754733
## iter 60 value 1378.722961
## iter 70 value 1284.498562
## iter 80 value 1230.437961
## iter 90 value 1192.080294
## iter 100 value 1135.471591
## final value 1135.471591
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4577.833873
## iter 10 value 2480.039508
## iter 20 value 1898.889491
## iter 30 value 1723.410545
## iter 40 value 1594.211499
## iter 50 value 1512.817075
## iter 60 value 1426.280138
## iter 70 value 1339.807006
## iter 80 value 1281.479811
## iter 90 value 1242.853784
## iter 100 value 1201.528818
## final value 1201.528818
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4577.833873
## iter 10 value 2480.055256
## iter 20 value 1898.922463
## iter 30 value 1723.470395
## iter 40 value 1594.309875
## iter 50 value 1512.765231
## iter 60 value 1426.169717
## iter 70 value 1341.465065
## iter 80 value 1282.812822
## iter 90 value 1245.979016
## iter 100 value 1198.779445
## final value 1198.779445
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4577.833873
## iter 10 value 2480.039524
## iter 20 value 1898.889524
## iter 30 value 1723.410605
## iter 40 value 1594.211598
## iter 50 value 1512.817001
## iter 60 value 1426.280017
## iter 70 value 1339.812049
## iter 80 value 1281.450942
## iter 90 value 1242.852218
## iter 100 value 1201.535866
## final value 1201.535866
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4596.380128
## iter 10 value 2482.108649
## iter 20 value 1967.722360
## iter 30 value 1744.703018
## iter 40 value 1625.431692
## iter 50 value 1542.137085
## iter 60 value 1447.201736
## iter 70 value 1363.915796
## iter 80 value 1307.003484
## iter 90 value 1280.542573
## iter 100 value 1229.253361
## final value 1229.253361
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4596.380128
## iter 10 value 2482.123607
## iter 20 value 1967.740817
## iter 30 value 1744.814059
## iter 40 value 1629.131969
## iter 50 value 1552.717520
## iter 60 value 1438.294755
## iter 70 value 1359.088403
## iter 80 value 1308.737287
## iter 90 value 1268.798300
## iter 100 value 1222.364364
## final value 1222.364364
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4596.380128
## iter 10 value 2482.108664
## iter 20 value 1967.722378
## iter 30 value 1744.703128
## iter 40 value 1625.430491
## iter 50 value 1542.132829
## iter 60 value 1447.144198
## iter 70 value 1363.783629
## iter 80 value 1306.814413
## iter 90 value 1280.464538
## iter 100 value 1225.259780
## final value 1225.259780
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4602.562213
## iter 10 value 2502.274383
## iter 20 value 2027.016585
## iter 30 value 1795.270378
## iter 40 value 1635.974594
## iter 50 value 1560.606367
## iter 60 value 1441.188317
## iter 70 value 1350.830720
## iter 80 value 1300.792689
## iter 90 value 1262.100059
## iter 100 value 1210.704375
## final value 1210.704375
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4602.562213
## iter 10 value 2502.290021
## iter 20 value 2027.019361
## iter 30 value 1795.292894
## iter 40 value 1635.986807
## iter 50 value 1561.170198
## iter 60 value 1442.436006
## iter 70 value 1351.275398
## iter 80 value 1306.043359
## iter 90 value 1266.483361
## iter 100 value 1211.202569
## final value 1211.202569
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4602.562213
## iter 10 value 2502.274398
## iter 20 value 2027.016588
## iter 30 value 1795.270400
## iter 40 value 1635.974605
## iter 50 value 1560.606919
## iter 60 value 1441.189527
## iter 70 value 1350.832254
## iter 80 value 1300.791836
## iter 90 value 1262.225792
## iter 100 value 1210.709120
## final value 1210.709120
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4587.107001
## iter 10 value 2450.708092
## iter 20 value 1883.710510
## iter 30 value 1715.913923
## iter 40 value 1565.452989
## iter 50 value 1492.237579
## iter 60 value 1425.624913
## iter 70 value 1325.002189
## iter 80 value 1279.513771
## iter 90 value 1236.142360
## iter 100 value 1191.259249
## final value 1191.259249
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4587.107001
## iter 10 value 2450.723923
## iter 20 value 1883.740144
## iter 30 value 1715.956843
## iter 40 value 1565.541292
## iter 50 value 1492.135464
## iter 60 value 1428.642125
## iter 70 value 1328.884012
## iter 80 value 1281.612168
## iter 90 value 1239.279069
## iter 100 value 1172.400007
## final value 1172.400007
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 4587.107001
## iter 10 value 2450.708108
## iter 20 value 1883.710540
## iter 30 value 1715.913966
## iter 40 value 1565.453077
## iter 50 value 1492.237466
## iter 60 value 1425.626902
## iter 70 value 1325.005225
## iter 80 value 1279.512724
## iter 90 value 1236.139009
## iter 100 value 1191.290077
## final value 1191.290077
## stopped after 100 iterations
## # weights: 198 (168 variable)
## initial value 5100.220048
## iter 10 value 2242.817616
## iter 20 value 1896.131636
## iter 30 value 1771.316158
## iter 40 value 1678.332480
## iter 50 value 1625.018087
## iter 60 value 1545.456283
## iter 70 value 1464.938787
## iter 80 value 1450.721861
## iter 90 value 1431.161754
## iter 100 value 1405.002678
## final value 1405.002678
## stopped after 100 iterations
# Prédire
pred_reg <- predict(reg_model, newdata = data.test)
# Accuracy
conf_matrix <- confusionMatrix(pred_reg, data.test$label)
confusionMatrix(pred_reg, data.test$label)
## Confusion Matrix and Statistics
##
## Reference
## Prediction apple banana blackgram chickpea coconut coffee cotton grapes jute
## apple 25 0 0 0 0 0 0 0 0
## banana 0 25 0 0 0 0 2 1 0
## blackgram 0 0 18 0 0 0 0 0 0
## chickpea 0 0 0 25 0 0 0 0 0
## coconut 0 0 0 0 22 0 0 0 2
## coffee 0 0 0 0 0 24 0 0 1
## cotton 0 0 0 0 0 0 23 0 0
## grapes 0 0 0 0 0 0 0 24 0
## jute 0 0 0 0 0 0 0 0 12
## kidneybeans 0 0 0 0 0 0 0 0 0
## lentil 0 0 3 0 0 0 0 0 0
## maize 0 0 0 0 0 0 0 0 6
## mango 0 0 0 0 0 0 0 0 0
## mothbeans 0 0 4 0 0 0 0 0 0
## mungbean 0 0 0 0 0 0 0 0 0
## muskmelon 0 0 0 0 0 0 0 0 0
## orange 0 0 0 0 1 0 0 0 0
## papaya 0 0 0 0 0 0 0 0 2
## pigeonpeas 0 0 0 0 0 0 0 0 0
## pomegranate 0 0 0 0 2 0 0 0 0
## rice 0 0 0 0 0 0 0 0 2
## watermelon 0 0 0 0 0 1 0 0 0
## Reference
## Prediction kidneybeans lentil maize mango mothbeans mungbean muskmelon
## apple 0 0 0 0 0 0 0
## banana 0 0 3 0 4 0 0
## blackgram 0 5 0 0 2 0 0
## chickpea 0 0 0 0 0 0 0
## coconut 0 0 0 0 0 0 0
## coffee 0 0 0 0 0 0 0
## cotton 0 0 3 0 0 0 0
## grapes 0 0 0 0 0 0 0
## jute 0 0 0 0 0 0 0
## kidneybeans 25 0 0 0 0 0 0
## lentil 0 19 0 0 0 0 0
## maize 0 0 18 0 0 0 0
## mango 0 0 0 24 0 0 0
## mothbeans 0 0 1 1 17 0 0
## mungbean 0 1 0 0 0 25 0
## muskmelon 0 0 0 0 0 0 25
## orange 0 0 0 0 0 0 0
## papaya 0 0 0 0 1 0 0
## pigeonpeas 0 0 0 0 1 0 0
## pomegranate 0 0 0 0 0 0 0
## rice 0 0 0 0 0 0 0
## watermelon 0 0 0 0 0 0 0
## Reference
## Prediction orange papaya pigeonpeas pomegranate rice watermelon
## apple 0 0 0 0 0 0
## banana 0 0 0 0 0 0
## blackgram 0 0 1 0 0 0
## chickpea 0 0 0 0 0 0
## coconut 1 0 0 2 1 0
## coffee 0 0 0 0 0 0
## cotton 0 0 0 0 0 0
## grapes 0 0 0 0 0 0
## jute 0 0 0 0 2 0
## kidneybeans 0 0 6 0 0 0
## lentil 0 0 0 0 0 0
## maize 0 0 0 0 0 0
## mango 0 0 1 0 0 0
## mothbeans 0 0 0 0 0 0
## mungbean 0 3 0 0 0 0
## muskmelon 0 1 0 0 0 8
## orange 23 0 0 0 0 0
## papaya 1 17 0 0 1 0
## pigeonpeas 0 0 17 0 0 0
## pomegranate 0 0 0 23 0 0
## rice 0 4 0 0 21 0
## watermelon 0 0 0 0 0 17
##
## Overall Statistics
##
## Accuracy : 0.8527
## 95% CI : (0.8203, 0.8813)
## No Information Rate : 0.0455
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8457
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: apple Class: banana Class: blackgram
## Sensitivity 1.00000 1.00000 0.72000
## Specificity 1.00000 0.98095 0.98476
## Pos Pred Value 1.00000 0.71429 0.69231
## Neg Pred Value 1.00000 1.00000 0.98664
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04545 0.03273
## Detection Prevalence 0.04545 0.06364 0.04727
## Balanced Accuracy 1.00000 0.99048 0.85238
## Class: chickpea Class: coconut Class: coffee Class: cotton
## Sensitivity 1.00000 0.88000 0.96000 0.92000
## Specificity 1.00000 0.98857 0.99810 0.99429
## Pos Pred Value 1.00000 0.78571 0.96000 0.88462
## Neg Pred Value 1.00000 0.99425 0.99810 0.99618
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04000 0.04364 0.04182
## Detection Prevalence 0.04545 0.05091 0.04545 0.04727
## Balanced Accuracy 1.00000 0.93429 0.97905 0.95714
## Class: grapes Class: jute Class: kidneybeans Class: lentil
## Sensitivity 0.96000 0.48000 1.00000 0.76000
## Specificity 1.00000 0.99619 0.98857 0.99429
## Pos Pred Value 1.00000 0.85714 0.80645 0.86364
## Neg Pred Value 0.99810 0.97575 1.00000 0.98864
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.04364 0.02182 0.04545 0.03455
## Detection Prevalence 0.04364 0.02545 0.05636 0.04000
## Balanced Accuracy 0.98000 0.73810 0.99429 0.87714
## Class: maize Class: mango Class: mothbeans Class: mungbean
## Sensitivity 0.72000 0.96000 0.68000 1.00000
## Specificity 0.98857 0.99810 0.98857 0.99238
## Pos Pred Value 0.75000 0.96000 0.73913 0.86207
## Neg Pred Value 0.98669 0.99810 0.98482 1.00000
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.03273 0.04364 0.03091 0.04545
## Detection Prevalence 0.04364 0.04545 0.04182 0.05273
## Balanced Accuracy 0.85429 0.97905 0.83429 0.99619
## Class: muskmelon Class: orange Class: papaya
## Sensitivity 1.00000 0.92000 0.68000
## Specificity 0.98286 0.99810 0.99048
## Pos Pred Value 0.73529 0.95833 0.77273
## Neg Pred Value 1.00000 0.99620 0.98485
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04182 0.03091
## Detection Prevalence 0.06182 0.04364 0.04000
## Balanced Accuracy 0.99143 0.95905 0.83524
## Class: pigeonpeas Class: pomegranate Class: rice
## Sensitivity 0.68000 0.92000 0.84000
## Specificity 0.99810 0.99619 0.98857
## Pos Pred Value 0.94444 0.92000 0.77778
## Neg Pred Value 0.98496 0.99619 0.99235
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.03091 0.04182 0.03818
## Detection Prevalence 0.03273 0.04545 0.04909
## Balanced Accuracy 0.83905 0.95810 0.91429
## Class: watermelon
## Sensitivity 0.68000
## Specificity 0.99810
## Pos Pred Value 0.94444
## Neg Pred Value 0.98496
## Prevalence 0.04545
## Detection Rate 0.03091
## Detection Prevalence 0.03273
## Balanced Accuracy 0.83905
accuracy <- conf_matrix$overall['Accuracy']
print(paste("Accuracy: ", accuracy))
## [1] "Accuracy: 0.852727272727273"
Accuracy égale à 0,85.
tuneGrid <- expand.grid(k = 1:20)
set.seed(123)
# Entraîner le modèle Knn avec la validation croisée
knn_model <- train(label ~ .,
data = data.train,
method = "knn",
tuneGrid = tuneGrid, ## test les valeurs de k de 1 à 20
trControl = ctrl)
# Meilleur k trouvé
best_k <- knn_model$bestTune
print(best_k)
## k
## 1 1
# Prédire avec le meilleur modèle
pred_knn <- predict(knn_model, newdata = data.test)
# Accuracy
conf_matrix <- confusionMatrix(pred_knn, data.test$label)
confusionMatrix(pred_knn, data.test$label)
## Confusion Matrix and Statistics
##
## Reference
## Prediction apple banana blackgram chickpea coconut coffee cotton grapes jute
## apple 25 0 0 0 0 0 0 0 0
## banana 0 25 0 0 0 0 0 0 0
## blackgram 0 0 25 0 0 0 0 0 0
## chickpea 0 0 0 25 0 0 0 0 0
## coconut 0 0 0 0 25 0 0 0 0
## coffee 0 0 0 0 0 25 0 0 0
## cotton 0 0 0 0 0 0 24 0 0
## grapes 0 0 0 0 0 0 0 25 0
## jute 0 0 0 0 0 0 0 0 19
## kidneybeans 0 0 0 0 0 0 0 0 0
## lentil 0 0 0 0 0 0 0 0 0
## maize 0 0 0 0 0 0 1 0 0
## mango 0 0 0 0 0 0 0 0 0
## mothbeans 0 0 0 0 0 0 0 0 0
## mungbean 0 0 0 0 0 0 0 0 0
## muskmelon 0 0 0 0 0 0 0 0 0
## orange 0 0 0 0 0 0 0 0 0
## papaya 0 0 0 0 0 0 0 0 1
## pigeonpeas 0 0 0 0 0 0 0 0 0
## pomegranate 0 0 0 0 0 0 0 0 0
## rice 0 0 0 0 0 0 0 0 5
## watermelon 0 0 0 0 0 0 0 0 0
## Reference
## Prediction kidneybeans lentil maize mango mothbeans mungbean muskmelon
## apple 0 0 0 0 0 0 0
## banana 0 0 0 0 0 0 0
## blackgram 0 0 0 0 0 0 0
## chickpea 0 0 0 0 0 0 0
## coconut 0 0 0 0 0 0 0
## coffee 0 0 0 0 0 0 0
## cotton 0 0 0 0 0 0 0
## grapes 0 0 0 0 0 0 0
## jute 0 0 0 0 0 0 0
## kidneybeans 25 0 0 0 0 0 0
## lentil 0 25 0 0 2 0 0
## maize 0 0 25 0 0 0 0
## mango 0 0 0 25 0 0 0
## mothbeans 0 0 0 0 23 0 0
## mungbean 0 0 0 0 0 25 0
## muskmelon 0 0 0 0 0 0 25
## orange 0 0 0 0 0 0 0
## papaya 0 0 0 0 0 0 0
## pigeonpeas 0 0 0 0 0 0 0
## pomegranate 0 0 0 0 0 0 0
## rice 0 0 0 0 0 0 0
## watermelon 0 0 0 0 0 0 0
## Reference
## Prediction orange papaya pigeonpeas pomegranate rice watermelon
## apple 0 0 0 0 0 0
## banana 0 0 0 0 0 0
## blackgram 0 0 0 0 0 0
## chickpea 0 0 0 0 0 0
## coconut 0 0 0 0 0 0
## coffee 0 0 0 0 0 0
## cotton 0 0 0 0 0 0
## grapes 0 0 0 0 0 0
## jute 0 0 0 0 1 0
## kidneybeans 0 0 0 0 0 0
## lentil 0 0 0 0 0 0
## maize 0 0 0 0 0 0
## mango 0 0 0 0 0 0
## mothbeans 0 0 0 0 0 0
## mungbean 0 0 0 0 0 0
## muskmelon 0 0 0 0 0 0
## orange 25 0 0 0 0 0
## papaya 0 24 0 0 0 0
## pigeonpeas 0 0 25 0 0 0
## pomegranate 0 0 0 25 0 0
## rice 0 1 0 0 24 0
## watermelon 0 0 0 0 0 25
##
## Overall Statistics
##
## Accuracy : 0.98
## 95% CI : (0.9645, 0.99)
## No Information Rate : 0.0455
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.979
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: apple Class: banana Class: blackgram
## Sensitivity 1.00000 1.00000 1.00000
## Specificity 1.00000 1.00000 1.00000
## Pos Pred Value 1.00000 1.00000 1.00000
## Neg Pred Value 1.00000 1.00000 1.00000
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04545 0.04545
## Detection Prevalence 0.04545 0.04545 0.04545
## Balanced Accuracy 1.00000 1.00000 1.00000
## Class: chickpea Class: coconut Class: coffee Class: cotton
## Sensitivity 1.00000 1.00000 1.00000 0.96000
## Specificity 1.00000 1.00000 1.00000 1.00000
## Pos Pred Value 1.00000 1.00000 1.00000 1.00000
## Neg Pred Value 1.00000 1.00000 1.00000 0.99810
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04545 0.04545 0.04364
## Detection Prevalence 0.04545 0.04545 0.04545 0.04364
## Balanced Accuracy 1.00000 1.00000 1.00000 0.98000
## Class: grapes Class: jute Class: kidneybeans Class: lentil
## Sensitivity 1.00000 0.76000 1.00000 1.00000
## Specificity 1.00000 0.99810 1.00000 0.99619
## Pos Pred Value 1.00000 0.95000 1.00000 0.92593
## Neg Pred Value 1.00000 0.98868 1.00000 1.00000
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.03455 0.04545 0.04545
## Detection Prevalence 0.04545 0.03636 0.04545 0.04909
## Balanced Accuracy 1.00000 0.87905 1.00000 0.99810
## Class: maize Class: mango Class: mothbeans Class: mungbean
## Sensitivity 1.00000 1.00000 0.92000 1.00000
## Specificity 0.99810 1.00000 1.00000 1.00000
## Pos Pred Value 0.96154 1.00000 1.00000 1.00000
## Neg Pred Value 1.00000 1.00000 0.99620 1.00000
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04545 0.04182 0.04545
## Detection Prevalence 0.04727 0.04545 0.04182 0.04545
## Balanced Accuracy 0.99905 1.00000 0.96000 1.00000
## Class: muskmelon Class: orange Class: papaya
## Sensitivity 1.00000 1.00000 0.96000
## Specificity 1.00000 1.00000 0.99810
## Pos Pred Value 1.00000 1.00000 0.96000
## Neg Pred Value 1.00000 1.00000 0.99810
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04545 0.04364
## Detection Prevalence 0.04545 0.04545 0.04545
## Balanced Accuracy 1.00000 1.00000 0.97905
## Class: pigeonpeas Class: pomegranate Class: rice
## Sensitivity 1.00000 1.00000 0.96000
## Specificity 1.00000 1.00000 0.98857
## Pos Pred Value 1.00000 1.00000 0.80000
## Neg Pred Value 1.00000 1.00000 0.99808
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04545 0.04364
## Detection Prevalence 0.04545 0.04545 0.05455
## Balanced Accuracy 1.00000 1.00000 0.97429
## Class: watermelon
## Sensitivity 1.00000
## Specificity 1.00000
## Pos Pred Value 1.00000
## Neg Pred Value 1.00000
## Prevalence 0.04545
## Detection Rate 0.04545
## Detection Prevalence 0.04545
## Balanced Accuracy 1.00000
accuracy <- conf_matrix$overall['Accuracy']
print(paste("Accuracy: ", accuracy))
## [1] "Accuracy: 0.98"
Grâce à la validation croisée, plusieurs valeurs de k ont été testées. Un k optimal qui maximise l’accuracy sans risquer de sur-ajuster le modèle aux données d’entraînement a été choisi. Il prend la valeur de 1 ; cela signifie que le modèle utilise uniquement le plus proche voisin pour faire ses prédictions.
Premièrement, on a vu précédemment que certaines classes semblent très proches les unes des autres en termes de variables explicatives. Cela signifie qu’il existe des régions dans l’espace des variables où les observations de classes différentes sont fortement entremêlées. En choisissant k = 1, le modèle peut mieux s’adapter à ces subtilités locales en se basant uniquement sur le point le plus proche, sans être influencé par des voisins appartenant à d’autres classes.
Deuxièmement, le grand nombre de classes ajoute de la complexité, en particulier dans les zones où plusieurs classes coexistent à proximité dans l’espace des caractéristiques. Les frontières entre les classes peuvent donc être très complexes. Un k plus grand pourrait créer un effet de lissage, en mélangeant les points de différentes classes, ce qui pourrait mener à des erreurs de classification.
Le modèle Knn possède une accuracy égale à 0,98. C’est un très bon résultat étant donné le grand nombre de classes.
Réalisé sur python
Lorsque la multicollinéarité est présente, elle peut conduire à des informations redondantes dans l’ensemble de données. L’algorithme peut alors rencontrer des difficultés à distinguer les différentes variables et le pouvoir prédictif du modèle peut ainsi être biaisé (Japa et al., 2019). Nous réalisons donc une matrice de corrélation pour identifier les relations de dépendance entre les variables de notre jeu de données et ainsi détecter si certaines variables pourraient éventuellement être supprimer afin de limiter la redondance.
library (tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::lift() masks caret::lift()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggcorrplot)
dta[,-8] %>% cor() %>% round(1) %>% ggcorrplot(type = "lower", lab = TRUE)
On observe ici que les variables Potassium et Phosphore sont fortement corrélées entre elles avec un coefficient de corrélation égale à 0,7.
## Suppression de la variable P
data.trainP <- data.train[,-2]
data.testP <- data.test[,-2]
set.seed(123)
ctrl <- trainControl(method = "cv", number = 10) # 10-fold cross-validation
# Entraîner le modèle regression avec la validation croisée
reg_model_P <- train(label ~ ., data = data.trainP, method = "multinom", trControl = ctrl)
## # weights: 176 (147 variable)
## initial value 4596.380128
## iter 10 value 2581.262653
## iter 20 value 2305.797351
## iter 30 value 2074.935867
## iter 40 value 1959.007042
## iter 50 value 1859.721927
## iter 60 value 1734.253971
## iter 70 value 1667.757096
## iter 80 value 1647.175884
## iter 90 value 1613.739821
## iter 100 value 1478.656285
## final value 1478.656285
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4596.380128
## iter 10 value 2581.273826
## iter 20 value 2305.828982
## iter 30 value 2075.020772
## iter 40 value 1959.136209
## iter 50 value 1859.827445
## iter 60 value 1735.985118
## iter 70 value 1668.265148
## iter 80 value 1644.843006
## iter 90 value 1611.327532
## iter 100 value 1557.781005
## final value 1557.781005
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4596.380128
## iter 10 value 2581.262664
## iter 20 value 2305.797382
## iter 30 value 2074.935952
## iter 40 value 1959.007172
## iter 50 value 1859.722026
## iter 60 value 1734.252651
## iter 70 value 1667.753129
## iter 80 value 1647.157070
## iter 90 value 1613.785453
## iter 100 value 1479.324916
## final value 1479.324916
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4587.107001
## iter 10 value 2596.849764
## iter 20 value 2035.338428
## iter 30 value 1778.972962
## iter 40 value 1647.012872
## iter 50 value 1576.374650
## iter 60 value 1449.206271
## iter 70 value 1408.961614
## iter 80 value 1392.460073
## iter 90 value 1363.474723
## iter 100 value 1231.786137
## final value 1231.786137
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4587.107001
## iter 10 value 2596.858034
## iter 20 value 2035.343245
## iter 30 value 1778.894070
## iter 40 value 1646.736668
## iter 50 value 1573.324578
## iter 60 value 1460.952065
## iter 70 value 1422.660426
## iter 80 value 1407.394296
## iter 90 value 1370.342571
## iter 100 value 1228.651096
## final value 1228.651096
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4587.107001
## iter 10 value 2596.849773
## iter 20 value 2035.338432
## iter 30 value 1778.972882
## iter 40 value 1647.012581
## iter 50 value 1576.376228
## iter 60 value 1449.211329
## iter 70 value 1408.967852
## iter 80 value 1392.469250
## iter 90 value 1363.477627
## iter 100 value 1231.676259
## final value 1231.676259
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4580.924916
## iter 10 value 2652.801625
## iter 20 value 2184.412608
## iter 30 value 1885.597894
## iter 40 value 1742.586954
## iter 50 value 1667.202787
## iter 60 value 1548.494164
## iter 70 value 1527.781041
## iter 80 value 1510.485789
## iter 90 value 1452.157996
## iter 100 value 1070.709644
## final value 1070.709644
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4580.924916
## iter 10 value 2652.811032
## iter 20 value 2184.432356
## iter 30 value 1885.657852
## iter 40 value 1742.711096
## iter 50 value 1667.379798
## iter 60 value 1551.199220
## iter 70 value 1529.552241
## iter 80 value 1511.664752
## iter 90 value 1459.052752
## iter 100 value 1306.580862
## final value 1306.580862
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4580.924916
## iter 10 value 2652.801635
## iter 20 value 2184.412627
## iter 30 value 1885.597954
## iter 40 value 1742.587078
## iter 50 value 1667.202943
## iter 60 value 1548.496678
## iter 70 value 1527.799870
## iter 80 value 1510.422714
## iter 90 value 1452.028765
## iter 100 value 1074.606155
## final value 1074.606155
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4593.289086
## iter 10 value 2633.270964
## iter 20 value 2205.246760
## iter 30 value 2012.339517
## iter 40 value 1891.870795
## iter 50 value 1832.500019
## iter 60 value 1740.943776
## iter 70 value 1692.331021
## iter 80 value 1670.221773
## iter 90 value 1627.482186
## iter 100 value 1561.282609
## final value 1561.282609
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4593.289086
## iter 10 value 2633.283860
## iter 20 value 2205.275569
## iter 30 value 2012.142024
## iter 40 value 1887.003900
## iter 50 value 1805.936730
## iter 60 value 1724.505068
## iter 70 value 1683.490734
## iter 80 value 1659.966727
## iter 90 value 1620.530403
## iter 100 value 1542.714725
## final value 1542.714725
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4593.289086
## iter 10 value 2633.270977
## iter 20 value 2205.246788
## iter 30 value 2012.339328
## iter 40 value 1891.867476
## iter 50 value 1832.470634
## iter 60 value 1740.846307
## iter 70 value 1692.239629
## iter 80 value 1670.210589
## iter 90 value 1627.238393
## iter 100 value 1559.828424
## final value 1559.828424
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4584.015958
## iter 10 value 2615.104883
## iter 20 value 2024.687500
## iter 30 value 1778.799465
## iter 40 value 1687.079869
## iter 50 value 1601.924396
## iter 60 value 1496.179208
## iter 70 value 1469.592625
## iter 80 value 1454.568001
## iter 90 value 1433.795793
## iter 100 value 1339.812178
## final value 1339.812178
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4584.015958
## iter 10 value 2615.114159
## iter 20 value 2024.687488
## iter 30 value 1778.938699
## iter 40 value 1690.517867
## iter 50 value 1607.548056
## iter 60 value 1504.135079
## iter 70 value 1477.620986
## iter 80 value 1458.271882
## iter 90 value 1439.048301
## iter 100 value 1323.867856
## final value 1323.867856
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4584.015958
## iter 10 value 2615.104892
## iter 20 value 2024.687500
## iter 30 value 1778.799591
## iter 40 value 1687.079698
## iter 50 value 1601.925408
## iter 60 value 1496.180833
## iter 70 value 1469.593975
## iter 80 value 1454.586813
## iter 90 value 1433.761568
## iter 100 value 1346.901478
## final value 1346.901478
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4602.562213
## iter 10 value 2583.821990
## iter 20 value 2199.654326
## iter 30 value 1983.354658
## iter 40 value 1845.177885
## iter 50 value 1790.643659
## iter 60 value 1669.912797
## iter 70 value 1631.015595
## iter 80 value 1609.642383
## iter 90 value 1575.524633
## iter 100 value 1505.509238
## final value 1505.509238
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4602.562213
## iter 10 value 2583.832956
## iter 20 value 2199.684431
## iter 30 value 1983.420010
## iter 40 value 1845.406446
## iter 50 value 1791.348857
## iter 60 value 1665.506832
## iter 70 value 1626.316965
## iter 80 value 1609.255749
## iter 90 value 1576.938745
## iter 100 value 1508.965666
## final value 1508.965666
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4602.562213
## iter 10 value 2583.822001
## iter 20 value 2199.654356
## iter 30 value 1983.354723
## iter 40 value 1845.178112
## iter 50 value 1790.646091
## iter 60 value 1669.913332
## iter 70 value 1630.401223
## iter 80 value 1607.139736
## iter 90 value 1571.409196
## iter 100 value 1502.529055
## final value 1502.529055
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4602.562213
## iter 10 value 2645.405990
## iter 20 value 2257.474360
## iter 30 value 2018.337493
## iter 40 value 1907.524940
## iter 50 value 1806.590591
## iter 60 value 1666.534564
## iter 70 value 1613.320175
## iter 80 value 1581.923993
## iter 90 value 1543.822925
## iter 100 value 1488.469218
## final value 1488.469218
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4602.562213
## iter 10 value 2645.415299
## iter 20 value 2257.500097
## iter 30 value 2018.416925
## iter 40 value 1907.775854
## iter 50 value 1806.984629
## iter 60 value 1667.536793
## iter 70 value 1614.527389
## iter 80 value 1581.794787
## iter 90 value 1536.983785
## iter 100 value 1476.064934
## final value 1476.064934
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4602.562213
## iter 10 value 2645.405999
## iter 20 value 2257.474386
## iter 30 value 2018.337572
## iter 40 value 1907.525191
## iter 50 value 1806.590984
## iter 60 value 1666.535558
## iter 70 value 1613.320902
## iter 80 value 1581.946482
## iter 90 value 1543.918645
## iter 100 value 1488.473983
## final value 1488.473983
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4584.015958
## iter 10 value 2572.526511
## iter 20 value 2188.673106
## iter 30 value 2005.624767
## iter 40 value 1905.044071
## iter 50 value 1817.174264
## iter 60 value 1736.198442
## iter 70 value 1693.306780
## iter 80 value 1658.955532
## iter 90 value 1625.181723
## iter 100 value 1550.510826
## final value 1550.510826
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4584.015958
## iter 10 value 2572.537619
## iter 20 value 2188.697258
## iter 30 value 2005.800316
## iter 40 value 1905.028554
## iter 50 value 1834.097625
## iter 60 value 1743.249665
## iter 70 value 1696.900617
## iter 80 value 1664.950248
## iter 90 value 1626.865263
## iter 100 value 1566.033031
## final value 1566.033031
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4584.015958
## iter 10 value 2572.526522
## iter 20 value 2188.673131
## iter 30 value 2005.624942
## iter 40 value 1905.044041
## iter 50 value 1817.187875
## iter 60 value 1736.247730
## iter 70 value 1693.334855
## iter 80 value 1658.929084
## iter 90 value 1625.126510
## iter 100 value 1541.475704
## final value 1541.475704
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4593.289086
## iter 10 value 2568.071276
## iter 20 value 2210.689265
## iter 30 value 2013.237200
## iter 40 value 1849.248052
## iter 50 value 1773.652246
## iter 60 value 1642.786954
## iter 70 value 1585.431921
## iter 80 value 1563.440038
## iter 90 value 1537.306857
## iter 100 value 1484.906564
## final value 1484.906564
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4593.289086
## iter 10 value 2568.081946
## iter 20 value 2210.714958
## iter 30 value 2013.230658
## iter 40 value 1848.977876
## iter 50 value 1775.340275
## iter 60 value 1649.724025
## iter 70 value 1590.965443
## iter 80 value 1568.232494
## iter 90 value 1540.531252
## iter 100 value 1477.232124
## final value 1477.232124
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4593.289086
## iter 10 value 2568.071287
## iter 20 value 2210.689291
## iter 30 value 2013.237193
## iter 40 value 1849.247771
## iter 50 value 1773.650601
## iter 60 value 1642.765828
## iter 70 value 1585.444010
## iter 80 value 1563.423079
## iter 90 value 1535.374384
## iter 100 value 1470.097032
## final value 1470.097032
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4577.833873
## iter 10 value 2547.112704
## iter 20 value 2191.923548
## iter 30 value 1974.756981
## iter 40 value 1827.063080
## iter 50 value 1753.307488
## iter 60 value 1645.184246
## iter 70 value 1611.964227
## iter 80 value 1591.018382
## iter 90 value 1565.703793
## iter 100 value 1469.697365
## final value 1469.697365
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4577.833873
## iter 10 value 2547.123629
## iter 20 value 2191.677206
## iter 30 value 1973.881199
## iter 40 value 1827.029255
## iter 50 value 1755.342976
## iter 60 value 1644.552940
## iter 70 value 1613.600411
## iter 80 value 1595.679554
## iter 90 value 1574.279406
## iter 100 value 1458.482066
## final value 1458.482066
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4577.833873
## iter 10 value 2547.112715
## iter 20 value 2191.923301
## iter 30 value 1974.756038
## iter 40 value 1827.062886
## iter 50 value 1753.311071
## iter 60 value 1645.184513
## iter 70 value 1611.963642
## iter 80 value 1590.950159
## iter 90 value 1565.586360
## iter 100 value 1469.869023
## final value 1469.869023
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 5100.220048
## iter 10 value 2755.005958
## iter 20 value 2295.412976
## iter 30 value 2086.522665
## iter 40 value 1958.583059
## iter 50 value 1882.771233
## iter 60 value 1751.714172
## iter 70 value 1695.216129
## iter 80 value 1664.602373
## iter 90 value 1635.954381
## iter 100 value 1550.256507
## final value 1550.256507
## stopped after 100 iterations
# Prédire
pred_reg_P <- predict(reg_model_P, newdata = data.testP)
# Accuracy
conf_matrix_P <- confusionMatrix(pred_reg_P, data.testP$label)
confusionMatrix(pred_reg_P, data.testP$label)
## Confusion Matrix and Statistics
##
## Reference
## Prediction apple banana blackgram chickpea coconut coffee cotton grapes jute
## apple 25 0 0 0 0 0 0 2 0
## banana 0 22 0 0 0 0 2 2 2
## blackgram 0 0 22 0 0 0 0 0 0
## chickpea 0 0 0 24 0 1 0 2 0
## coconut 0 0 0 0 20 0 0 0 3
## coffee 0 0 0 1 0 17 0 1 0
## cotton 0 0 0 0 0 0 20 0 0
## grapes 0 0 0 0 0 0 0 17 0
## jute 0 0 0 0 0 0 0 0 8
## kidneybeans 0 0 0 0 0 0 0 0 0
## lentil 0 0 2 0 0 0 0 0 0
## maize 0 0 0 0 0 7 0 0 1
## mango 0 0 0 0 0 0 0 1 0
## mothbeans 0 0 1 0 0 0 0 0 0
## mungbean 0 0 0 0 0 0 0 0 0
## muskmelon 0 0 0 0 0 0 0 0 0
## orange 0 0 0 0 0 0 0 0 0
## papaya 0 2 0 0 2 0 0 0 1
## pigeonpeas 0 0 0 0 0 0 0 0 0
## pomegranate 0 0 0 0 3 0 0 0 0
## rice 0 0 0 0 0 0 0 0 10
## watermelon 0 1 0 0 0 0 3 0 0
## Reference
## Prediction kidneybeans lentil maize mango mothbeans mungbean muskmelon
## apple 0 0 0 0 0 0 0
## banana 0 0 0 0 0 0 0
## blackgram 0 5 1 1 3 0 0
## chickpea 0 0 0 0 0 0 0
## coconut 0 0 0 0 0 0 0
## coffee 0 0 0 0 0 0 0
## cotton 0 0 1 0 0 0 0
## grapes 0 0 0 0 0 0 0
## jute 0 0 0 0 0 0 0
## kidneybeans 22 0 0 0 0 0 0
## lentil 0 8 2 0 2 0 0
## maize 0 0 15 0 0 0 0
## mango 0 0 0 24 7 0 0
## mothbeans 1 3 0 0 12 0 0
## mungbean 0 9 0 0 0 25 0
## muskmelon 0 0 1 0 0 0 25
## orange 0 0 1 0 0 0 0
## papaya 0 0 0 0 0 0 0
## pigeonpeas 2 0 0 0 1 0 0
## pomegranate 0 0 0 0 0 0 0
## rice 0 0 0 0 0 0 0
## watermelon 0 0 4 0 0 0 0
## Reference
## Prediction orange papaya pigeonpeas pomegranate rice watermelon
## apple 0 0 0 0 0 0
## banana 0 0 0 0 0 0
## blackgram 0 0 1 0 0 0
## chickpea 0 0 0 0 0 0
## coconut 1 0 1 1 1 0
## coffee 0 0 0 0 0 0
## cotton 0 0 0 0 0 1
## grapes 0 0 0 0 0 0
## jute 0 0 0 0 1 0
## kidneybeans 0 0 6 0 0 0
## lentil 0 0 0 2 0 0
## maize 0 0 0 0 0 0
## mango 0 1 14 0 0 0
## mothbeans 0 0 0 0 0 0
## mungbean 0 2 0 0 0 0
## muskmelon 0 2 0 0 0 6
## orange 24 0 0 0 0 0
## papaya 0 14 0 1 0 0
## pigeonpeas 0 0 3 0 0 0
## pomegranate 0 1 0 21 0 0
## rice 0 4 0 0 23 0
## watermelon 0 1 0 0 0 18
##
## Overall Statistics
##
## Accuracy : 0.7436
## 95% CI : (0.705, 0.7796)
## No Information Rate : 0.0455
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7314
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: apple Class: banana Class: blackgram
## Sensitivity 1.00000 0.88000 0.88000
## Specificity 0.99619 0.98857 0.97905
## Pos Pred Value 0.92593 0.78571 0.66667
## Neg Pred Value 1.00000 0.99425 0.99420
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04000 0.04000
## Detection Prevalence 0.04909 0.05091 0.06000
## Balanced Accuracy 0.99810 0.93429 0.92952
## Class: chickpea Class: coconut Class: coffee Class: cotton
## Sensitivity 0.96000 0.80000 0.68000 0.80000
## Specificity 0.99429 0.98667 0.99619 0.99619
## Pos Pred Value 0.88889 0.74074 0.89474 0.90909
## Neg Pred Value 0.99809 0.99044 0.98493 0.99053
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.04364 0.03636 0.03091 0.03636
## Detection Prevalence 0.04909 0.04909 0.03455 0.04000
## Balanced Accuracy 0.97714 0.89333 0.83810 0.89810
## Class: grapes Class: jute Class: kidneybeans Class: lentil
## Sensitivity 0.68000 0.32000 0.88000 0.32000
## Specificity 1.00000 0.99810 0.98857 0.98476
## Pos Pred Value 1.00000 0.88889 0.78571 0.50000
## Neg Pred Value 0.98499 0.96858 0.99425 0.96816
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.03091 0.01455 0.04000 0.01455
## Detection Prevalence 0.03091 0.01636 0.05091 0.02909
## Balanced Accuracy 0.84000 0.65905 0.93429 0.65238
## Class: maize Class: mango Class: mothbeans Class: mungbean
## Sensitivity 0.60000 0.96000 0.48000 1.00000
## Specificity 0.98476 0.95619 0.99048 0.97905
## Pos Pred Value 0.65217 0.51064 0.70588 0.69444
## Neg Pred Value 0.98102 0.99801 0.97561 1.00000
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.02727 0.04364 0.02182 0.04545
## Detection Prevalence 0.04182 0.08545 0.03091 0.06545
## Balanced Accuracy 0.79238 0.95810 0.73524 0.98952
## Class: muskmelon Class: orange Class: papaya
## Sensitivity 1.00000 0.96000 0.56000
## Specificity 0.98286 0.99810 0.98857
## Pos Pred Value 0.73529 0.96000 0.70000
## Neg Pred Value 1.00000 0.99810 0.97925
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04364 0.02545
## Detection Prevalence 0.06182 0.04545 0.03636
## Balanced Accuracy 0.99143 0.97905 0.77429
## Class: pigeonpeas Class: pomegranate Class: rice
## Sensitivity 0.120000 0.84000 0.92000
## Specificity 0.994286 0.99238 0.97333
## Pos Pred Value 0.500000 0.84000 0.62162
## Neg Pred Value 0.959559 0.99238 0.99610
## Prevalence 0.045455 0.04545 0.04545
## Detection Rate 0.005455 0.03818 0.04182
## Detection Prevalence 0.010909 0.04545 0.06727
## Balanced Accuracy 0.557143 0.91619 0.94667
## Class: watermelon
## Sensitivity 0.72000
## Specificity 0.98286
## Pos Pred Value 0.66667
## Neg Pred Value 0.98662
## Prevalence 0.04545
## Detection Rate 0.03273
## Detection Prevalence 0.04909
## Balanced Accuracy 0.85143
accuracy_P <- conf_matrix_P$overall['Accuracy']
print(paste("Accuracy: ", accuracy_P))
## [1] "Accuracy: 0.743636363636364"
## Suppression de la variable K
data.trainK <- data.train[,-3]
data.testK <- data.test[,-3]
set.seed(123)
# Entraîner le modèle Knn avec la validation croisée
ctrl <- trainControl(method = "cv", number = 10) # 10-fold cross-validation
# Entraîner le modèle regression avec la validation croisée
reg_model_K <- train(label ~ ., data = data.trainK, method = "multinom", trControl = ctrl)
## # weights: 176 (147 variable)
## initial value 4596.380128
## iter 10 value 2695.894232
## iter 20 value 1744.596009
## iter 30 value 1397.272764
## iter 40 value 1314.882191
## iter 50 value 1194.549499
## iter 60 value 1155.698430
## iter 70 value 1127.480739
## iter 80 value 1082.694732
## iter 90 value 1035.933107
## iter 100 value 887.945470
## final value 887.945470
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4596.380128
## iter 10 value 2695.923982
## iter 20 value 1744.654996
## iter 30 value 1397.375394
## iter 40 value 1314.901071
## iter 50 value 1195.229341
## iter 60 value 1156.541557
## iter 70 value 1128.347181
## iter 80 value 1084.198523
## iter 90 value 1033.549386
## iter 100 value 865.784887
## final value 865.784887
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4596.380128
## iter 10 value 2695.894261
## iter 20 value 1744.596068
## iter 30 value 1397.272867
## iter 40 value 1314.882209
## iter 50 value 1194.550177
## iter 60 value 1155.699275
## iter 70 value 1127.481607
## iter 80 value 1082.691308
## iter 90 value 1035.925072
## iter 100 value 887.805612
## final value 887.805612
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4587.107001
## iter 10 value 2589.818679
## iter 20 value 2063.598266
## iter 30 value 1914.271224
## iter 40 value 1826.183296
## iter 50 value 1713.642011
## iter 60 value 1592.254587
## iter 70 value 1568.400766
## iter 80 value 1548.516590
## iter 90 value 1508.292826
## iter 100 value 1330.466991
## final value 1330.466991
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4587.107001
## iter 10 value 2589.827868
## iter 20 value 2063.631011
## iter 30 value 1914.255213
## iter 40 value 1826.032181
## iter 50 value 1714.772699
## iter 60 value 1593.795704
## iter 70 value 1568.995577
## iter 80 value 1548.318271
## iter 90 value 1507.711737
## iter 100 value 1332.447450
## final value 1332.447450
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4587.107001
## iter 10 value 2589.818688
## iter 20 value 2063.598299
## iter 30 value 1914.271208
## iter 40 value 1826.183144
## iter 50 value 1713.643223
## iter 60 value 1592.255968
## iter 70 value 1568.401066
## iter 80 value 1548.514345
## iter 90 value 1508.264585
## iter 100 value 1332.595081
## final value 1332.595081
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4580.924916
## iter 10 value 2490.339841
## iter 20 value 2030.132082
## iter 30 value 1793.393240
## iter 40 value 1716.022992
## iter 50 value 1601.048471
## iter 60 value 1503.162184
## iter 70 value 1482.412496
## iter 80 value 1455.472095
## iter 90 value 1429.329699
## iter 100 value 1318.430535
## final value 1318.430535
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4580.924916
## iter 10 value 2490.348658
## iter 20 value 2030.152366
## iter 30 value 1793.458577
## iter 40 value 1716.125834
## iter 50 value 1601.225658
## iter 60 value 1503.912294
## iter 70 value 1483.326583
## iter 80 value 1456.636611
## iter 90 value 1428.716092
## iter 100 value 1322.141026
## final value 1322.141026
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4580.924916
## iter 10 value 2490.339850
## iter 20 value 2030.132102
## iter 30 value 1793.393305
## iter 40 value 1716.023095
## iter 50 value 1601.048652
## iter 60 value 1503.162939
## iter 70 value 1482.413419
## iter 80 value 1455.473284
## iter 90 value 1429.331045
## iter 100 value 1318.505575
## final value 1318.505575
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4593.289086
## iter 10 value 2476.376151
## iter 20 value 2054.643763
## iter 30 value 1848.686357
## iter 40 value 1742.222535
## iter 50 value 1639.605222
## iter 60 value 1552.784567
## iter 70 value 1521.850834
## iter 80 value 1499.397127
## iter 90 value 1469.388328
## iter 100 value 1304.592753
## final value 1304.592753
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4593.289086
## iter 10 value 2476.385434
## iter 20 value 2054.675735
## iter 30 value 1848.705516
## iter 40 value 1742.306393
## iter 50 value 1638.511936
## iter 60 value 1552.490320
## iter 70 value 1520.315920
## iter 80 value 1500.489811
## iter 90 value 1471.048246
## iter 100 value 1308.278374
## final value 1308.278374
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4593.289086
## iter 10 value 2476.376160
## iter 20 value 2054.643795
## iter 30 value 1848.686376
## iter 40 value 1742.222620
## iter 50 value 1639.604034
## iter 60 value 1552.784070
## iter 70 value 1521.851438
## iter 80 value 1499.399204
## iter 90 value 1469.385937
## iter 100 value 1307.054926
## final value 1307.054926
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4584.015958
## iter 10 value 2454.403572
## iter 20 value 1971.957272
## iter 30 value 1787.031237
## iter 40 value 1689.278863
## iter 50 value 1566.875772
## iter 60 value 1519.948288
## iter 70 value 1501.318245
## iter 80 value 1477.604126
## iter 90 value 1452.502160
## iter 100 value 1347.096200
## final value 1347.096200
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4584.015958
## iter 10 value 2454.412692
## iter 20 value 1971.972657
## iter 30 value 1786.995336
## iter 40 value 1689.263748
## iter 50 value 1567.147933
## iter 60 value 1511.115618
## iter 70 value 1494.613772
## iter 80 value 1472.571513
## iter 90 value 1456.361378
## iter 100 value 1228.385192
## final value 1228.385192
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4584.015958
## iter 10 value 2454.403581
## iter 20 value 1971.957287
## iter 30 value 1787.031202
## iter 40 value 1689.278848
## iter 50 value 1566.876042
## iter 60 value 1519.949170
## iter 70 value 1501.319296
## iter 80 value 1477.605144
## iter 90 value 1452.504059
## iter 100 value 1347.259070
## final value 1347.259070
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4602.562213
## iter 10 value 2598.121715
## iter 20 value 2025.176564
## iter 30 value 1800.748596
## iter 40 value 1677.825902
## iter 50 value 1581.771353
## iter 60 value 1485.699134
## iter 70 value 1437.866168
## iter 80 value 1421.744103
## iter 90 value 1380.903619
## iter 100 value 1285.165194
## final value 1285.165194
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4602.562213
## iter 10 value 2598.129205
## iter 20 value 2025.186888
## iter 30 value 1800.560813
## iter 40 value 1678.468373
## iter 50 value 1585.239469
## iter 60 value 1489.404980
## iter 70 value 1445.252887
## iter 80 value 1426.592957
## iter 90 value 1389.918148
## iter 100 value 1313.181924
## final value 1313.181924
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4602.562213
## iter 10 value 2598.121723
## iter 20 value 2025.176574
## iter 30 value 1800.748404
## iter 40 value 1677.826399
## iter 50 value 1581.770922
## iter 60 value 1485.692547
## iter 70 value 1437.864362
## iter 80 value 1421.737474
## iter 90 value 1380.883041
## iter 100 value 1284.703760
## final value 1284.703760
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4602.562213
## iter 10 value 2190.047802
## iter 20 value 1613.088849
## iter 30 value 1400.899174
## iter 40 value 1269.187701
## iter 50 value 1192.698575
## iter 60 value 1162.941423
## iter 70 value 1133.637730
## iter 80 value 1093.541888
## iter 90 value 1038.573280
## iter 100 value 964.225034
## final value 964.225034
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4602.562213
## iter 10 value 2190.071997
## iter 20 value 1613.143530
## iter 30 value 1400.991809
## iter 40 value 1269.356854
## iter 50 value 1193.003080
## iter 60 value 1163.494094
## iter 70 value 1134.799775
## iter 80 value 1095.511071
## iter 90 value 1046.453251
## iter 100 value 965.615290
## final value 965.615290
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4602.562213
## iter 10 value 2190.047826
## iter 20 value 1613.088903
## iter 30 value 1400.899267
## iter 40 value 1269.187871
## iter 50 value 1192.698875
## iter 60 value 1162.941967
## iter 70 value 1133.638918
## iter 80 value 1093.543900
## iter 90 value 1038.577238
## iter 100 value 964.223060
## final value 964.223060
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4584.015958
## iter 10 value 2155.245961
## iter 20 value 1588.414176
## iter 30 value 1402.248011
## iter 40 value 1289.551626
## iter 50 value 1198.090028
## iter 60 value 1160.326639
## iter 70 value 1135.602819
## iter 80 value 1087.163496
## iter 90 value 1026.113247
## iter 100 value 941.983496
## final value 941.983496
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4584.015958
## iter 10 value 2155.271478
## iter 20 value 1588.504806
## iter 30 value 1402.051554
## iter 40 value 1289.422144
## iter 50 value 1198.209866
## iter 60 value 1161.048049
## iter 70 value 1136.999718
## iter 80 value 1094.030046
## iter 90 value 1028.877363
## iter 100 value 970.271742
## final value 970.271742
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4584.015958
## iter 10 value 2155.245987
## iter 20 value 1588.414266
## iter 30 value 1402.247808
## iter 40 value 1289.551472
## iter 50 value 1198.090093
## iter 60 value 1160.327424
## iter 70 value 1135.612227
## iter 80 value 1087.237432
## iter 90 value 1025.803025
## iter 100 value 946.794211
## final value 946.794211
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4593.289086
## iter 10 value 2855.962837
## iter 20 value 2460.673575
## iter 30 value 2265.214298
## iter 40 value 2117.793992
## iter 50 value 2024.034955
## iter 60 value 1864.193171
## iter 70 value 1793.582222
## iter 80 value 1733.627131
## iter 90 value 1679.158874
## iter 100 value 1585.936249
## final value 1585.936249
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4593.289086
## iter 10 value 2855.976015
## iter 20 value 2460.707237
## iter 30 value 2266.493875
## iter 40 value 2118.128673
## iter 50 value 2023.035516
## iter 60 value 1874.487565
## iter 70 value 1779.114737
## iter 80 value 1732.560677
## iter 90 value 1666.742607
## iter 100 value 1574.800434
## final value 1574.800434
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4593.289086
## iter 10 value 2855.962850
## iter 20 value 2460.673609
## iter 30 value 2265.215497
## iter 40 value 2117.794265
## iter 50 value 2024.033605
## iter 60 value 1864.351827
## iter 70 value 1793.563004
## iter 80 value 1733.712738
## iter 90 value 1679.024645
## iter 100 value 1585.178300
## final value 1585.178300
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4577.833873
## iter 10 value 2875.010042
## iter 20 value 2533.133311
## iter 30 value 2302.349325
## iter 40 value 2180.048605
## iter 50 value 2123.134806
## iter 60 value 1999.350422
## iter 70 value 1915.091948
## iter 80 value 1868.083275
## iter 90 value 1777.738339
## iter 100 value 1675.732605
## final value 1675.732605
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4577.833873
## iter 10 value 2875.022599
## iter 20 value 2533.157368
## iter 30 value 2302.350707
## iter 40 value 2180.188688
## iter 50 value 2123.355644
## iter 60 value 1997.655819
## iter 70 value 1924.781001
## iter 80 value 1882.128515
## iter 90 value 1792.263035
## iter 100 value 1675.545240
## final value 1675.545240
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 4577.833873
## iter 10 value 2875.010055
## iter 20 value 2533.133335
## iter 30 value 2302.349327
## iter 40 value 2180.048744
## iter 50 value 2123.134782
## iter 60 value 1999.347059
## iter 70 value 1915.088738
## iter 80 value 1868.072987
## iter 90 value 1777.718078
## iter 100 value 1675.690073
## final value 1675.690073
## stopped after 100 iterations
## # weights: 176 (147 variable)
## initial value 5100.220048
## iter 10 value 2953.882688
## iter 20 value 2499.950087
## iter 30 value 2233.342045
## iter 40 value 2098.880785
## iter 50 value 2027.191028
## iter 60 value 1920.112516
## iter 70 value 1837.790497
## iter 80 value 1795.549633
## iter 90 value 1758.489062
## iter 100 value 1652.426752
## final value 1652.426752
## stopped after 100 iterations
# Prédire
pred_reg_K <- predict(reg_model_K, newdata = data.testK)
# Accuracy
conf_matrix_K <- confusionMatrix(pred_reg_K, data.testK$label)
confusionMatrix(pred_reg_K, data.testK$label)
## Confusion Matrix and Statistics
##
## Reference
## Prediction apple banana blackgram chickpea coconut coffee cotton grapes jute
## apple 20 0 0 0 0 0 0 0 0
## banana 0 24 0 0 0 0 1 0 0
## blackgram 0 0 16 0 0 0 0 0 0
## chickpea 0 0 0 25 0 3 0 0 0
## coconut 0 0 0 0 21 0 0 0 0
## coffee 0 0 0 0 0 22 0 0 0
## cotton 0 0 0 0 0 0 12 4 0
## grapes 5 0 0 0 0 0 0 21 0
## jute 0 0 0 0 0 0 0 0 19
## kidneybeans 0 0 0 0 0 0 0 0 0
## lentil 0 0 5 0 0 0 0 0 0
## maize 0 1 0 0 0 0 12 0 0
## mango 0 0 0 0 0 0 0 0 0
## mothbeans 0 0 4 0 0 0 0 0 0
## mungbean 0 0 0 0 0 0 0 0 0
## muskmelon 0 0 0 0 0 0 0 0 0
## orange 0 0 0 0 0 0 0 0 0
## papaya 0 0 0 0 0 0 0 0 0
## pigeonpeas 0 0 0 0 0 0 0 0 0
## pomegranate 0 0 0 0 4 0 0 0 0
## rice 0 0 0 0 0 0 0 0 6
## watermelon 0 0 0 0 0 0 0 0 0
## Reference
## Prediction kidneybeans lentil maize mango mothbeans mungbean muskmelon
## apple 0 0 0 0 0 0 0
## banana 0 0 1 0 0 0 0
## blackgram 0 1 0 0 4 0 0
## chickpea 6 0 4 0 0 0 0
## coconut 0 0 0 0 0 0 3
## coffee 0 0 0 0 0 0 0
## cotton 0 0 1 0 0 0 0
## grapes 0 0 0 0 0 0 0
## jute 0 0 0 0 0 0 0
## kidneybeans 19 0 0 0 0 0 0
## lentil 0 24 0 0 4 2 0
## maize 0 0 19 0 0 0 0
## mango 0 0 0 24 0 0 0
## mothbeans 0 0 0 0 13 0 0
## mungbean 0 0 0 0 0 23 0
## muskmelon 0 0 0 0 0 0 13
## orange 0 0 0 0 0 0 0
## papaya 0 0 0 0 3 0 0
## pigeonpeas 0 0 0 1 1 0 0
## pomegranate 0 0 0 0 0 0 0
## rice 0 0 0 0 0 0 0
## watermelon 0 0 0 0 0 0 9
## Reference
## Prediction orange papaya pigeonpeas pomegranate rice watermelon
## apple 0 1 0 0 0 0
## banana 0 0 2 0 0 0
## blackgram 0 0 0 0 0 0
## chickpea 0 0 1 0 0 0
## coconut 3 0 0 7 0 0
## coffee 0 0 0 0 0 0
## cotton 0 0 0 0 0 0
## grapes 0 0 0 0 0 0
## jute 0 1 0 0 1 0
## kidneybeans 0 0 7 0 0 0
## lentil 0 0 0 0 0 0
## maize 0 0 0 0 0 0
## mango 3 0 0 0 0 0
## mothbeans 0 0 0 0 0 0
## mungbean 0 3 0 0 0 0
## muskmelon 0 1 0 0 0 0
## orange 8 0 0 3 0 0
## papaya 1 14 0 0 0 0
## pigeonpeas 0 1 13 0 0 0
## pomegranate 10 0 0 15 0 0
## rice 0 4 2 0 24 0
## watermelon 0 0 0 0 0 25
##
## Overall Statistics
##
## Accuracy : 0.7527
## 95% CI : (0.7145, 0.7883)
## No Information Rate : 0.0455
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.741
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: apple Class: banana Class: blackgram
## Sensitivity 0.80000 0.96000 0.64000
## Specificity 0.99810 0.99238 0.99048
## Pos Pred Value 0.95238 0.85714 0.76190
## Neg Pred Value 0.99055 0.99808 0.98299
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.03636 0.04364 0.02909
## Detection Prevalence 0.03818 0.05091 0.03818
## Balanced Accuracy 0.89905 0.97619 0.81524
## Class: chickpea Class: coconut Class: coffee Class: cotton
## Sensitivity 1.00000 0.84000 0.88000 0.48000
## Specificity 0.97333 0.97524 1.00000 0.99048
## Pos Pred Value 0.64103 0.61765 1.00000 0.70588
## Neg Pred Value 1.00000 0.99225 0.99432 0.97561
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.03818 0.04000 0.02182
## Detection Prevalence 0.07091 0.06182 0.04000 0.03091
## Balanced Accuracy 0.98667 0.90762 0.94000 0.73524
## Class: grapes Class: jute Class: kidneybeans Class: lentil
## Sensitivity 0.84000 0.76000 0.76000 0.96000
## Specificity 0.99048 0.99619 0.98667 0.97905
## Pos Pred Value 0.80769 0.90476 0.73077 0.68571
## Neg Pred Value 0.99237 0.98866 0.98855 0.99806
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.03818 0.03455 0.03455 0.04364
## Detection Prevalence 0.04727 0.03818 0.04727 0.06364
## Balanced Accuracy 0.91524 0.87810 0.87333 0.96952
## Class: maize Class: mango Class: mothbeans Class: mungbean
## Sensitivity 0.76000 0.96000 0.52000 0.92000
## Specificity 0.97524 0.99429 0.99238 0.99429
## Pos Pred Value 0.59375 0.88889 0.76471 0.88462
## Neg Pred Value 0.98842 0.99809 0.97749 0.99618
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.03455 0.04364 0.02364 0.04182
## Detection Prevalence 0.05818 0.04909 0.03091 0.04727
## Balanced Accuracy 0.86762 0.97714 0.75619 0.95714
## Class: muskmelon Class: orange Class: papaya
## Sensitivity 0.52000 0.32000 0.56000
## Specificity 0.99810 0.99429 0.99238
## Pos Pred Value 0.92857 0.72727 0.77778
## Neg Pred Value 0.97761 0.96846 0.97932
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.02364 0.01455 0.02545
## Detection Prevalence 0.02545 0.02000 0.03273
## Balanced Accuracy 0.75905 0.65714 0.77619
## Class: pigeonpeas Class: pomegranate Class: rice
## Sensitivity 0.52000 0.60000 0.96000
## Specificity 0.99429 0.97333 0.97714
## Pos Pred Value 0.81250 0.51724 0.66667
## Neg Pred Value 0.97753 0.98081 0.99805
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.02364 0.02727 0.04364
## Detection Prevalence 0.02909 0.05273 0.06545
## Balanced Accuracy 0.75714 0.78667 0.96857
## Class: watermelon
## Sensitivity 1.00000
## Specificity 0.98286
## Pos Pred Value 0.73529
## Neg Pred Value 1.00000
## Prevalence 0.04545
## Detection Rate 0.04545
## Detection Prevalence 0.06182
## Balanced Accuracy 0.99143
accuracy_K <- conf_matrix_K$overall['Accuracy']
print(paste("Accuracy: ", accuracy_K))
## [1] "Accuracy: 0.752727272727273"
# Obtenir l'importance des variables
importance <- varImp(reg_model, scale = FALSE)
print(importance)
## multinom variable importance
##
## Overall
## ph 19.1634
## temperature 2.2970
## K 1.1167
## P 0.9973
## humidity 0.9972
## rainfall 0.4806
## N 0.4462
library(nnet)
# choix du meilleur modèle
multinom_model <- multinom(label ~ ., data = data.train)
## # weights: 198 (168 variable)
## initial value 5100.220048
## iter 10 value 2242.817606
## iter 20 value 1896.131609
## iter 30 value 1771.316057
## iter 40 value 1678.332182
## iter 50 value 1625.019326
## iter 60 value 1545.452929
## iter 70 value 1464.936340
## iter 80 value 1450.719204
## iter 90 value 1431.159839
## iter 100 value 1405.117234
## final value 1405.117234
## stopped after 100 iterations
# Appliquer la sélection pas à pas avec step()
step_model <- step(multinom_model, direction = "both")
## Start: AIC=3146.23
## label ~ N + P + K + temperature + humidity + ph + rainfall
##
## trying - N
## # weights: 176 (147 variable)
## initial value 5100.220048
## iter 10 value 2145.294392
## iter 20 value 1708.270225
## iter 30 value 1466.838836
## iter 40 value 1320.280220
## iter 50 value 1221.599591
## iter 60 value 1160.052019
## iter 70 value 1120.819664
## iter 80 value 1077.132929
## iter 90 value 1032.310719
## iter 100 value 795.099039
## final value 795.099039
## stopped after 100 iterations
## trying - P
## # weights: 176 (147 variable)
## initial value 5100.220048
## iter 10 value 2755.005944
## iter 20 value 2295.412945
## iter 30 value 2086.522476
## iter 40 value 1958.583285
## iter 50 value 1882.769502
## iter 60 value 1751.721139
## iter 70 value 1695.218232
## iter 80 value 1664.584301
## iter 90 value 1635.959032
## iter 100 value 1549.823547
## final value 1549.823547
## stopped after 100 iterations
## trying - K
## # weights: 176 (147 variable)
## initial value 5100.220048
## iter 10 value 2953.864577
## iter 20 value 2499.916765
## iter 30 value 2233.269757
## iter 40 value 2098.722810
## iter 50 value 2026.858265
## iter 60 value 1919.240501
## iter 70 value 1837.084234
## iter 80 value 1795.909098
## iter 90 value 1753.303335
## iter 100 value 1642.087770
## final value 1642.087770
## stopped after 100 iterations
## trying - temperature
## # weights: 176 (147 variable)
## initial value 5100.220048
## iter 10 value 2313.786771
## iter 20 value 2069.616910
## iter 30 value 2047.172995
## iter 40 value 1940.186706
## iter 50 value 1870.998303
## iter 60 value 1842.589660
## iter 70 value 1813.771477
## iter 80 value 1779.932977
## iter 90 value 1746.869181
## iter 100 value 1663.126621
## final value 1663.126621
## stopped after 100 iterations
## trying - humidity
## # weights: 176 (147 variable)
## initial value 5100.220048
## iter 10 value 3319.213122
## iter 20 value 2702.599140
## iter 30 value 2368.662821
## iter 40 value 2205.863473
## iter 50 value 2106.760031
## iter 60 value 1969.583018
## iter 70 value 1912.418326
## iter 80 value 1876.636381
## iter 90 value 1825.985885
## iter 100 value 1633.805698
## final value 1633.805698
## stopped after 100 iterations
## trying - ph
## # weights: 176 (147 variable)
## initial value 5100.220048
## iter 10 value 2246.992054
## iter 20 value 1901.274612
## iter 30 value 1784.880828
## iter 40 value 1715.027657
## iter 50 value 1702.153538
## iter 60 value 1697.460170
## iter 70 value 1657.459225
## iter 80 value 1504.373900
## iter 90 value 1368.766239
## iter 100 value 1129.500164
## final value 1129.500164
## stopped after 100 iterations
## trying - rainfall
## # weights: 176 (147 variable)
## initial value 5100.220048
## iter 10 value 2471.156802
## iter 20 value 1975.457863
## iter 30 value 1754.384909
## iter 40 value 1633.232092
## iter 50 value 1559.741984
## iter 60 value 1517.823610
## iter 70 value 1474.354477
## iter 80 value 1432.281877
## iter 90 value 1359.603520
## iter 100 value 1215.528758
## final value 1215.528758
## stopped after 100 iterations
## Df AIC
## - N 147 1884.198
## - ph 147 2553.000
## - rainfall 147 2725.058
## <none> 168 3146.234
## - P 147 3393.647
## - humidity 147 3561.611
## - K 147 3578.176
## - temperature 147 3620.253
## # weights: 176 (147 variable)
## initial value 5100.220048
## iter 10 value 2145.294392
## iter 20 value 1708.270225
## iter 30 value 1466.838836
## iter 40 value 1320.280220
## iter 50 value 1221.599591
## iter 60 value 1160.052019
## iter 70 value 1120.819664
## iter 80 value 1077.132929
## iter 90 value 1032.310719
## iter 100 value 795.099039
## final value 795.099039
## stopped after 100 iterations
##
## Step: AIC=1884.2
## label ~ P + K + temperature + humidity + ph + rainfall
##
## trying - P
## # weights: 154 (126 variable)
## initial value 5100.220048
## iter 10 value 3529.361739
## iter 20 value 3282.543592
## iter 30 value 2968.977387
## iter 40 value 2828.043309
## iter 50 value 2707.710779
## iter 60 value 2450.671026
## iter 70 value 2399.260970
## iter 80 value 2330.475573
## iter 90 value 1968.479157
## iter 100 value 1213.566150
## final value 1213.566150
## stopped after 100 iterations
## trying - K
## # weights: 154 (126 variable)
## initial value 5100.220048
## iter 10 value 3549.129776
## iter 20 value 3223.716472
## iter 30 value 3048.173607
## iter 40 value 2832.897424
## iter 50 value 2682.132567
## iter 60 value 2483.672472
## iter 70 value 2428.369633
## iter 80 value 2327.514191
## iter 90 value 1893.056613
## iter 100 value 1199.718437
## final value 1199.718437
## stopped after 100 iterations
## trying - temperature
## # weights: 154 (126 variable)
## initial value 5100.220048
## iter 10 value 2227.590896
## iter 20 value 1813.347093
## iter 30 value 1700.137896
## iter 40 value 1472.572606
## iter 50 value 1378.288347
## iter 60 value 1343.864046
## iter 70 value 1317.223588
## iter 80 value 1276.890276
## iter 90 value 1156.999285
## iter 100 value 760.299060
## final value 760.299060
## stopped after 100 iterations
## trying - humidity
## # weights: 154 (126 variable)
## initial value 5100.220048
## iter 10 value 3418.354637
## iter 20 value 2679.159644
## iter 30 value 2287.710825
## iter 40 value 2063.124471
## iter 50 value 1842.525583
## iter 60 value 1783.502698
## iter 70 value 1715.584205
## iter 80 value 1656.463273
## iter 90 value 1403.809533
## iter 100 value 998.649246
## final value 998.649246
## stopped after 100 iterations
## trying - ph
## # weights: 154 (126 variable)
## initial value 5100.220048
## iter 10 value 2150.249038
## iter 20 value 1716.601078
## iter 30 value 1476.188324
## iter 40 value 1364.438057
## iter 50 value 1341.541959
## iter 60 value 1330.822546
## iter 70 value 1181.485860
## iter 80 value 874.283954
## iter 90 value 840.467860
## iter 100 value 667.470952
## final value 667.470952
## stopped after 100 iterations
## trying - rainfall
## # weights: 154 (126 variable)
## initial value 5100.220048
## iter 10 value 2618.705898
## iter 20 value 2311.332897
## iter 30 value 2142.181101
## iter 40 value 1999.738030
## iter 50 value 1884.493599
## iter 60 value 1843.738960
## iter 70 value 1802.586359
## iter 80 value 1684.187838
## iter 90 value 1328.328832
## iter 100 value 935.142524
## final value 935.142524
## stopped after 100 iterations
## trying + N
## # weights: 198 (168 variable)
## initial value 5100.220048
## iter 10 value 2242.817606
## iter 20 value 1896.131609
## iter 30 value 1771.316057
## iter 40 value 1678.332182
## iter 50 value 1625.019326
## iter 60 value 1545.452929
## iter 70 value 1464.936340
## iter 80 value 1450.719204
## iter 90 value 1431.159839
## iter 100 value 1405.117234
## final value 1405.117234
## stopped after 100 iterations
## Df AIC
## - ph 126 1586.942
## - temperature 126 1772.598
## <none> 147 1884.198
## - rainfall 126 2122.285
## - humidity 126 2249.298
## - K 126 2651.437
## - P 126 2679.132
## + +N 168 3146.234
## # weights: 154 (126 variable)
## initial value 5100.220048
## iter 10 value 2150.249038
## iter 20 value 1716.601078
## iter 30 value 1476.188324
## iter 40 value 1364.438057
## iter 50 value 1341.541959
## iter 60 value 1330.822546
## iter 70 value 1181.485860
## iter 80 value 874.283954
## iter 90 value 840.467860
## iter 100 value 667.470952
## final value 667.470952
## stopped after 100 iterations
##
## Step: AIC=1586.94
## label ~ P + K + temperature + humidity + rainfall
##
## trying - P
## # weights: 132 (105 variable)
## initial value 5100.220048
## iter 10 value 3529.445277
## iter 20 value 3289.605769
## iter 30 value 2979.276449
## iter 40 value 2847.285440
## iter 50 value 2814.601648
## iter 60 value 2800.852197
## iter 70 value 2607.781607
## iter 80 value 2200.454540
## iter 90 value 952.372069
## iter 100 value 278.008016
## final value 278.008016
## stopped after 100 iterations
## trying - K
## # weights: 132 (105 variable)
## initial value 5100.220048
## iter 10 value 3550.863883
## iter 20 value 3199.362612
## iter 30 value 3068.226477
## iter 40 value 2996.017137
## iter 50 value 2969.033424
## iter 60 value 2937.453904
## iter 70 value 2912.617919
## iter 80 value 2352.261389
## iter 90 value 945.804177
## iter 100 value 336.463278
## final value 336.463278
## stopped after 100 iterations
## trying - temperature
## # weights: 132 (105 variable)
## initial value 5100.220048
## iter 10 value 2232.844501
## iter 20 value 1828.531825
## iter 30 value 1722.737516
## iter 40 value 1696.933657
## iter 50 value 1674.820352
## iter 60 value 1505.446456
## iter 70 value 1378.319254
## iter 80 value 1161.807762
## iter 90 value 701.530404
## iter 100 value 183.038829
## final value 183.038829
## stopped after 100 iterations
## trying - humidity
## # weights: 132 (105 variable)
## initial value 5100.220048
## iter 10 value 3443.659742
## iter 20 value 2719.210012
## iter 30 value 2373.260955
## iter 40 value 2239.309592
## iter 50 value 2224.188332
## iter 60 value 1917.859252
## iter 70 value 1670.323789
## iter 80 value 1473.086974
## iter 90 value 1028.438604
## iter 100 value 636.301972
## final value 636.301972
## stopped after 100 iterations
## trying - rainfall
## # weights: 132 (105 variable)
## initial value 5100.220048
## iter 10 value 2630.137885
## iter 20 value 2329.801466
## iter 30 value 2185.001818
## iter 40 value 2129.612052
## iter 50 value 2112.762779
## iter 60 value 1988.585511
## iter 70 value 1621.559941
## iter 80 value 1340.451553
## iter 90 value 785.432688
## iter 100 value 424.324777
## final value 424.324777
## stopped after 100 iterations
## trying + N
## # weights: 176 (147 variable)
## initial value 5100.220048
## iter 10 value 2246.992054
## iter 20 value 1901.274612
## iter 30 value 1784.880828
## iter 40 value 1715.027657
## iter 50 value 1702.153538
## iter 60 value 1697.460170
## iter 70 value 1657.459225
## iter 80 value 1504.373900
## iter 90 value 1368.766164
## iter 100 value 1129.499963
## final value 1129.499963
## stopped after 100 iterations
## trying + ph
## # weights: 176 (147 variable)
## initial value 5100.220048
## iter 10 value 2145.294392
## iter 20 value 1708.270225
## iter 30 value 1466.838836
## iter 40 value 1320.280220
## iter 50 value 1221.599591
## iter 60 value 1160.052019
## iter 70 value 1120.819664
## iter 80 value 1077.132929
## iter 90 value 1032.310719
## iter 100 value 795.099038
## final value 795.099038
## stopped after 100 iterations
## Df AIC
## - temperature 105 576.0777
## - P 105 766.0160
## - K 105 882.9266
## - rainfall 105 1058.6496
## - humidity 105 1482.6039
## <none> 126 1586.9419
## + +ph 147 1884.1981
## + +N 147 2552.9999
## # weights: 132 (105 variable)
## initial value 5100.220048
## iter 10 value 2232.844501
## iter 20 value 1828.531825
## iter 30 value 1722.737516
## iter 40 value 1696.933657
## iter 50 value 1674.820352
## iter 60 value 1505.446456
## iter 70 value 1378.319254
## iter 80 value 1161.807762
## iter 90 value 701.530404
## iter 100 value 183.038829
## final value 183.038829
## stopped after 100 iterations
##
## Step: AIC=576.08
## label ~ P + K + humidity + rainfall
##
## trying - P
## # weights: 110 (84 variable)
## initial value 5100.220048
## iter 10 value 3561.403465
## iter 20 value 3471.927110
## iter 30 value 3452.005350
## iter 40 value 3433.299848
## iter 50 value 3291.273109
## iter 60 value 2892.345910
## iter 70 value 1779.250104
## iter 80 value 477.547746
## iter 90 value 368.694288
## iter 100 value 340.164770
## final value 340.164770
## stopped after 100 iterations
## trying - K
## # weights: 110 (84 variable)
## initial value 5100.220048
## iter 10 value 3639.985248
## iter 20 value 3448.430755
## iter 30 value 3394.364505
## iter 40 value 3385.109203
## iter 50 value 2792.866772
## iter 60 value 2556.472329
## iter 70 value 1079.407527
## iter 80 value 358.789813
## iter 90 value 307.465328
## iter 100 value 284.777551
## final value 284.777551
## stopped after 100 iterations
## trying - humidity
## # weights: 110 (84 variable)
## initial value 5100.220048
## iter 10 value 3816.622618
## iter 20 value 3732.813441
## iter 30 value 3724.180611
## iter 40 value 3705.323148
## iter 50 value 3334.726698
## iter 60 value 3129.246311
## iter 70 value 2203.246185
## iter 80 value 1133.609915
## iter 90 value 895.176856
## iter 100 value 812.433230
## final value 812.433230
## stopped after 100 iterations
## trying - rainfall
## # weights: 110 (84 variable)
## initial value 5100.220048
## iter 10 value 2821.121306
## iter 20 value 2675.216507
## iter 30 value 2649.074717
## iter 40 value 2622.124422
## iter 50 value 2258.998142
## iter 60 value 2061.397270
## iter 70 value 1516.123395
## iter 80 value 701.593173
## iter 90 value 611.501933
## iter 100 value 582.762256
## final value 582.762256
## stopped after 100 iterations
## trying + N
## # weights: 154 (126 variable)
## initial value 5100.220048
## iter 10 value 2318.096281
## iter 20 value 2082.389962
## iter 30 value 2070.580691
## iter 40 value 2061.586596
## iter 50 value 2055.669856
## iter 60 value 2008.528143
## iter 70 value 1579.701246
## iter 80 value 1555.262236
## iter 90 value 1338.257999
## iter 100 value 959.796814
## final value 959.796814
## stopped after 100 iterations
## trying + temperature
## # weights: 154 (126 variable)
## initial value 5100.220048
## iter 10 value 2150.249038
## iter 20 value 1716.601078
## iter 30 value 1476.188324
## iter 40 value 1364.438057
## iter 50 value 1341.541959
## iter 60 value 1330.822546
## iter 70 value 1181.485860
## iter 80 value 874.283953
## iter 90 value 840.467860
## iter 100 value 667.470958
## final value 667.470958
## stopped after 100 iterations
## trying + ph
## # weights: 154 (126 variable)
## initial value 5100.220048
## iter 10 value 2227.590896
## iter 20 value 1813.347093
## iter 30 value 1700.137896
## iter 40 value 1472.572606
## iter 50 value 1378.288347
## iter 60 value 1343.864046
## iter 70 value 1317.223588
## iter 80 value 1276.890273
## iter 90 value 1156.999083
## iter 100 value 760.298206
## final value 760.298206
## stopped after 100 iterations
## Df AIC
## <none> 105 576.0777
## - K 84 737.5551
## - P 84 848.3295
## - rainfall 84 1333.5245
## + +temperature 126 1586.9419
## + +ph 126 1772.5964
## - humidity 84 1792.8665
## + +N 126 2171.5936
# Afficher un résumé du modèle final après la sélection
summary(step_model)
## Call:
## multinom(formula = label ~ P + K + humidity + rainfall, data = data.train)
##
## Coefficients:
## (Intercept) P K humidity rainfall
## banana -69.569590 0.68550437 -0.64121449 1.09756922 -0.15547273
## blackgram 4.304603 0.59197584 -0.73814865 0.38141142 -0.29779770
## chickpea 85.857103 -0.53628711 0.31482158 -0.64844492 -0.31781615
## coconut -122.394119 -0.74087098 -0.54923735 2.05108719 0.04843387
## coffee 65.501066 -0.82484420 0.05044068 -0.11001263 -0.12260407
## cotton -15.643753 -0.03146854 -0.68701580 1.06776013 -0.22993235
## grapes 22.607961 -0.86959824 0.96565442 0.25611495 -1.53408537
## jute -9.222821 -0.12132592 -0.24733467 0.72698822 -0.11784652
## kidneybeans 42.128473 0.41152988 -0.31406305 -0.68771148 -0.15759945
## lentil 32.051281 0.69606296 -0.66315362 0.38463301 -0.94077923
## maize 44.283640 -0.07491802 -0.73507377 0.24370814 -0.18745151
## mango 89.396674 -0.90708986 0.28139659 -0.28154710 -0.29139217
## mothbeans 88.840132 -0.16448029 -0.15843198 -0.17870011 -0.57623817
## mungbean -23.049325 0.04842540 -0.90141795 1.43969271 -0.60314574
## muskmelon 5.025637 -0.56780129 0.15319815 0.92958678 -0.66414162
## orange -28.363184 -0.62236028 -1.11031540 1.54394884 -0.27269375
## papaya -157.154502 0.22500275 -0.42514871 2.30055417 -0.11681490
## pigeonpeas -10.829813 0.73889187 -0.74948922 0.08807975 -0.03254383
## pomegranate -21.602185 -0.66138045 0.05631900 1.01507330 -0.16711459
## rice -53.512323 -0.15084286 -0.49692113 1.07078690 0.02308844
## watermelon 63.100658 -0.75110609 0.21782070 0.14845885 -0.34537638
##
## Std. Errors:
## (Intercept) P K humidity rainfall
## banana 0.43930782 0.4581909 0.3870081 0.9072349 0.2773839
## blackgram 6.38377608 0.4457204 0.4045851 0.9064169 0.2783783
## chickpea 0.04023185 0.8410492 0.6405644 1.0950605 0.4322444
## coconut 0.04039392 0.4877052 0.4352699 0.9022431 0.2771137
## coffee 3.55970905 0.4458752 0.3828892 0.9032470 0.2760899
## cotton 2.96000851 0.4350137 0.3877767 0.9036950 0.2772180
## grapes 0.01283488 1.0721372 1.6717513 2.3082930 1.5744761
## jute 5.35343905 0.4247871 0.3710613 0.9024166 0.2755077
## kidneybeans 0.52518253 0.4587243 0.5241574 0.9127507 0.2805001
## lentil 3.80625809 0.4523549 0.4368496 0.9067764 0.2972084
## maize 5.48892342 0.4367704 0.3952955 0.9033529 0.2764861
## mango 3.31743266 0.4446593 0.3850433 0.9048456 0.2777515
## mothbeans 5.66781058 0.4402580 0.4190142 0.9034734 0.2816386
## mungbean 0.47818870 0.4400714 0.3965968 0.9060041 0.2885592
## muskmelon 0.01084407 0.4588559 0.4473388 0.9134011 0.3056316
## orange 0.02985024 0.5074100 0.7078151 0.9430702 0.3207375
## papaya 0.32618623 0.4290026 0.3664724 0.8994440 0.2756755
## pigeonpeas 2.72596146 0.4477546 0.4077518 0.9058731 0.2767771
## pomegranate 0.14822139 0.4636800 0.4131988 0.9036994 0.2775411
## rice 3.76941778 0.4267291 0.3788628 0.9017456 0.2760303
## watermelon 0.45941646 0.4484895 0.3443199 0.9004652 0.2784114
##
## Residual Deviance: 366.0777
## AIC: 576.0777
# Entraîner le nouveau modèle regression avec la validation croisée
reg_model_final <- train(label ~ P + K + humidity + rainfall, data = data.train, method = "multinom", trControl = ctrl)
## # weights: 132 (105 variable)
## initial value 4596.380128
## iter 10 value 3427.539052
## iter 20 value 2811.691896
## iter 30 value 2499.849620
## iter 40 value 2404.901538
## iter 50 value 2382.968151
## iter 60 value 2353.647916
## iter 70 value 2169.956689
## iter 80 value 1717.536429
## iter 90 value 1133.433992
## iter 100 value 263.943309
## final value 263.943309
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4596.380128
## iter 10 value 3427.592422
## iter 20 value 2811.781279
## iter 30 value 2488.517454
## iter 40 value 2398.761480
## iter 50 value 2377.971646
## iter 60 value 2337.451996
## iter 70 value 2197.439215
## iter 80 value 1707.389260
## iter 90 value 1052.788322
## iter 100 value 535.430640
## final value 535.430640
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4596.380128
## iter 10 value 3427.539105
## iter 20 value 2811.691985
## iter 30 value 2499.851097
## iter 40 value 2404.903281
## iter 50 value 2382.972097
## iter 60 value 2353.652246
## iter 70 value 2170.054128
## iter 80 value 1721.313193
## iter 90 value 988.480069
## iter 100 value 245.758027
## final value 245.758027
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4584.015958
## iter 10 value 2864.435832
## iter 20 value 2311.548336
## iter 30 value 2122.714169
## iter 40 value 2075.822014
## iter 50 value 2056.118540
## iter 60 value 2017.912127
## iter 70 value 1741.992406
## iter 80 value 1396.327240
## iter 90 value 845.488637
## iter 100 value 195.103067
## final value 195.103067
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4584.015958
## iter 10 value 2864.462299
## iter 20 value 2311.585220
## iter 30 value 2122.797889
## iter 40 value 2075.987038
## iter 50 value 2056.311815
## iter 60 value 2018.832665
## iter 70 value 1775.303313
## iter 80 value 1484.887588
## iter 90 value 912.052432
## iter 100 value 468.972951
## final value 468.972951
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4584.015958
## iter 10 value 2864.435859
## iter 20 value 2311.548373
## iter 30 value 2122.714253
## iter 40 value 2075.822178
## iter 50 value 2056.118732
## iter 60 value 2017.912469
## iter 70 value 1742.045042
## iter 80 value 1396.450528
## iter 90 value 790.960024
## iter 100 value 185.503631
## final value 185.503631
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4599.471171
## iter 10 value 2103.266448
## iter 20 value 1864.210216
## iter 30 value 1791.763298
## iter 40 value 1776.676365
## iter 50 value 1736.904659
## iter 60 value 1286.704982
## iter 70 value 1259.705610
## iter 80 value 1050.878197
## iter 90 value 642.005671
## iter 100 value 156.431565
## final value 156.431565
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4599.471171
## iter 10 value 2103.292866
## iter 20 value 1864.257807
## iter 30 value 1791.825162
## iter 40 value 1776.747610
## iter 50 value 1737.185760
## iter 60 value 1323.545381
## iter 70 value 1302.109061
## iter 80 value 1123.685430
## iter 90 value 788.825905
## iter 100 value 425.570789
## final value 425.570789
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4599.471171
## iter 10 value 2103.266474
## iter 20 value 1864.210264
## iter 30 value 1791.763360
## iter 40 value 1776.676436
## iter 50 value 1736.904941
## iter 60 value 1286.745072
## iter 70 value 1259.751897
## iter 80 value 1050.957270
## iter 90 value 642.676375
## iter 100 value 166.155118
## final value 166.155118
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4593.289086
## iter 10 value 2092.098057
## iter 20 value 1820.824074
## iter 30 value 1723.262173
## iter 40 value 1707.867542
## iter 50 value 1689.450389
## iter 60 value 1366.875232
## iter 70 value 1269.824143
## iter 80 value 1109.854378
## iter 90 value 828.307142
## iter 100 value 164.630245
## final value 164.630245
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4593.289086
## iter 10 value 2092.137509
## iter 20 value 1820.903330
## iter 30 value 1723.379765
## iter 40 value 1707.960767
## iter 50 value 1688.973726
## iter 60 value 1437.375905
## iter 70 value 1375.818246
## iter 80 value 1188.085289
## iter 90 value 864.541122
## iter 100 value 431.752698
## final value 431.752698
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4593.289086
## iter 10 value 2092.098097
## iter 20 value 1820.824153
## iter 30 value 1723.262283
## iter 40 value 1707.867643
## iter 50 value 1689.450352
## iter 60 value 1366.920168
## iter 70 value 1269.875624
## iter 80 value 1109.223064
## iter 90 value 617.421948
## iter 100 value 166.454930
## final value 166.454930
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4580.924916
## iter 10 value 2108.204602
## iter 20 value 1785.722909
## iter 30 value 1727.609583
## iter 40 value 1716.429909
## iter 50 value 1694.887723
## iter 60 value 1401.339019
## iter 70 value 1329.498964
## iter 80 value 1150.391889
## iter 90 value 638.584596
## iter 100 value 163.651342
## final value 163.651342
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4580.924916
## iter 10 value 2108.231283
## iter 20 value 1785.781954
## iter 30 value 1727.697465
## iter 40 value 1716.522248
## iter 50 value 1695.054380
## iter 60 value 1423.155390
## iter 70 value 1355.771993
## iter 80 value 1210.793157
## iter 90 value 792.956195
## iter 100 value 436.185810
## final value 436.185810
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4580.924916
## iter 10 value 2108.204629
## iter 20 value 1785.722968
## iter 30 value 1727.609670
## iter 40 value 1716.430001
## iter 50 value 1694.887889
## iter 60 value 1401.361938
## iter 70 value 1329.503529
## iter 80 value 1150.698564
## iter 90 value 632.866481
## iter 100 value 173.371046
## final value 173.371046
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4587.107001
## iter 10 value 2116.965117
## iter 20 value 1904.049986
## iter 30 value 1812.232530
## iter 40 value 1790.309230
## iter 50 value 1759.496748
## iter 60 value 1455.247119
## iter 70 value 1408.312786
## iter 80 value 1134.891211
## iter 90 value 794.501222
## iter 100 value 168.057704
## final value 168.057704
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4587.107001
## iter 10 value 2117.003029
## iter 20 value 1904.091875
## iter 30 value 1812.273250
## iter 40 value 1790.363304
## iter 50 value 1759.537739
## iter 60 value 1487.623139
## iter 70 value 1446.153792
## iter 80 value 1208.369254
## iter 90 value 914.786073
## iter 100 value 451.112674
## final value 451.112674
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4587.107001
## iter 10 value 2116.965155
## iter 20 value 1904.050028
## iter 30 value 1812.232571
## iter 40 value 1790.309284
## iter 50 value 1759.496787
## iter 60 value 1455.262074
## iter 70 value 1408.327894
## iter 80 value 1135.016041
## iter 90 value 769.410994
## iter 100 value 171.183263
## final value 171.183263
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4602.562213
## iter 10 value 3385.423802
## iter 20 value 2786.773439
## iter 30 value 2336.565560
## iter 40 value 2248.591773
## iter 50 value 2227.105247
## iter 60 value 2169.882552
## iter 70 value 1817.681747
## iter 80 value 1712.423598
## iter 90 value 894.464153
## iter 100 value 225.983972
## final value 225.983972
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4602.562213
## iter 10 value 3385.477935
## iter 20 value 2786.849209
## iter 30 value 2336.680363
## iter 40 value 2249.186835
## iter 50 value 2227.711558
## iter 60 value 2170.838908
## iter 70 value 1813.276069
## iter 80 value 1735.209840
## iter 90 value 991.695102
## iter 100 value 522.981298
## final value 522.981298
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4602.562213
## iter 10 value 3385.423856
## iter 20 value 2786.773515
## iter 30 value 2336.565674
## iter 40 value 2248.592361
## iter 50 value 2227.105846
## iter 60 value 2169.883502
## iter 70 value 1817.662596
## iter 80 value 1712.306958
## iter 90 value 907.425914
## iter 100 value 230.238062
## final value 230.238062
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4568.560746
## iter 10 value 2119.630498
## iter 20 value 1940.036857
## iter 30 value 1859.966355
## iter 40 value 1835.521595
## iter 50 value 1815.812627
## iter 60 value 1480.621768
## iter 70 value 1367.938587
## iter 80 value 1131.111741
## iter 90 value 685.996783
## iter 100 value 160.457582
## final value 160.457582
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4568.560746
## iter 10 value 2119.666678
## iter 20 value 1940.078054
## iter 30 value 1860.066112
## iter 40 value 1835.120337
## iter 50 value 1815.733596
## iter 60 value 1547.318645
## iter 70 value 1471.826245
## iter 80 value 1206.175854
## iter 90 value 933.064183
## iter 100 value 446.970127
## final value 446.970127
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4568.560746
## iter 10 value 2119.630534
## iter 20 value 1940.036898
## iter 30 value 1859.966454
## iter 40 value 1835.521180
## iter 50 value 1815.812411
## iter 60 value 1480.602102
## iter 70 value 1367.870195
## iter 80 value 1130.573856
## iter 90 value 692.061463
## iter 100 value 165.455591
## final value 165.455591
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4593.289086
## iter 10 value 2119.908084
## iter 20 value 1824.533717
## iter 30 value 1750.143318
## iter 40 value 1738.825511
## iter 50 value 1692.455904
## iter 60 value 1369.802591
## iter 70 value 1323.236564
## iter 80 value 1107.869097
## iter 90 value 635.916111
## iter 100 value 150.688663
## final value 150.688663
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4593.289086
## iter 10 value 2119.931818
## iter 20 value 1824.566427
## iter 30 value 1750.196525
## iter 40 value 1738.881870
## iter 50 value 1692.834752
## iter 60 value 1398.347412
## iter 70 value 1363.523865
## iter 80 value 1187.771600
## iter 90 value 934.935031
## iter 100 value 441.095971
## final value 441.095971
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4593.289086
## iter 10 value 2119.908107
## iter 20 value 1824.533750
## iter 30 value 1750.143371
## iter 40 value 1738.825568
## iter 50 value 1692.456283
## iter 60 value 1369.832750
## iter 70 value 1323.265143
## iter 80 value 1107.761248
## iter 90 value 636.465667
## iter 100 value 162.039390
## final value 162.039390
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4596.380128
## iter 10 value 2112.995747
## iter 20 value 1750.896605
## iter 30 value 1679.196547
## iter 40 value 1667.209416
## iter 50 value 1646.181493
## iter 60 value 1445.638291
## iter 70 value 1363.417167
## iter 80 value 1166.289471
## iter 90 value 658.277347
## iter 100 value 165.070949
## final value 165.070949
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4596.380128
## iter 10 value 2113.023643
## iter 20 value 1750.941181
## iter 30 value 1679.240536
## iter 40 value 1667.255507
## iter 50 value 1646.476787
## iter 60 value 1461.556326
## iter 70 value 1383.628017
## iter 80 value 1213.420555
## iter 90 value 843.540422
## iter 100 value 445.339300
## final value 445.339300
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 4596.380128
## iter 10 value 2112.995775
## iter 20 value 1750.896650
## iter 30 value 1679.196591
## iter 40 value 1667.209462
## iter 50 value 1646.181788
## iter 60 value 1445.655204
## iter 70 value 1363.423608
## iter 80 value 1166.399792
## iter 90 value 657.096225
## iter 100 value 174.756401
## final value 174.756401
## stopped after 100 iterations
## # weights: 132 (105 variable)
## initial value 5100.220048
## iter 10 value 2232.844501
## iter 20 value 1828.531825
## iter 30 value 1722.737516
## iter 40 value 1696.933657
## iter 50 value 1674.820352
## iter 60 value 1505.446456
## iter 70 value 1378.319254
## iter 80 value 1161.807762
## iter 90 value 701.530404
## iter 100 value 183.038829
## final value 183.038829
## stopped after 100 iterations
# Prédire
pred_reg <- predict(reg_model_final, newdata = data.test)
# Accuracy
conf_matrix <- confusionMatrix(pred_reg, data.test$label)
confusionMatrix(pred_reg, data.test$label)
## Confusion Matrix and Statistics
##
## Reference
## Prediction apple banana blackgram chickpea coconut coffee cotton grapes jute
## apple 25 0 0 0 0 0 0 1 0
## banana 0 24 0 0 0 0 0 0 0
## blackgram 0 0 22 0 0 0 0 0 0
## chickpea 0 0 0 25 0 0 0 0 0
## coconut 0 0 0 0 25 0 0 0 0
## coffee 0 0 0 0 0 25 0 0 0
## cotton 0 0 0 0 0 0 24 0 0
## grapes 0 0 0 0 0 0 0 24 0
## jute 0 1 0 0 0 0 0 0 19
## kidneybeans 0 0 0 0 0 0 0 0 0
## lentil 0 0 0 0 0 0 0 0 0
## maize 0 0 3 0 0 0 0 0 0
## mango 0 0 0 0 0 0 0 0 0
## mothbeans 0 0 0 0 0 0 0 0 0
## mungbean 0 0 0 0 0 0 1 0 0
## muskmelon 0 0 0 0 0 0 0 0 0
## orange 0 0 0 0 0 0 0 0 0
## papaya 0 0 0 0 0 0 0 0 4
## pigeonpeas 0 0 0 0 0 0 0 0 0
## pomegranate 0 0 0 0 0 0 0 0 0
## rice 0 0 0 0 0 0 0 0 2
## watermelon 0 0 0 0 0 0 0 0 0
## Reference
## Prediction kidneybeans lentil maize mango mothbeans mungbean muskmelon
## apple 0 0 0 0 0 0 0
## banana 0 0 0 0 0 0 0
## blackgram 0 0 3 0 0 0 0
## chickpea 0 0 0 0 0 0 0
## coconut 0 0 0 0 0 0 0
## coffee 0 0 0 0 0 0 0
## cotton 0 0 1 0 0 1 0
## grapes 0 0 0 0 0 0 0
## jute 0 0 0 0 0 0 0
## kidneybeans 23 0 0 0 0 0 0
## lentil 0 23 0 0 3 0 0
## maize 0 0 16 0 4 0 0
## mango 0 0 0 25 0 0 0
## mothbeans 1 2 5 0 18 0 0
## mungbean 0 0 0 0 0 24 0
## muskmelon 0 0 0 0 0 0 25
## orange 0 0 0 0 0 0 0
## papaya 0 0 0 0 0 0 0
## pigeonpeas 1 0 0 0 0 0 0
## pomegranate 0 0 0 0 0 0 0
## rice 0 0 0 0 0 0 0
## watermelon 0 0 0 0 0 0 0
## Reference
## Prediction orange papaya pigeonpeas pomegranate rice watermelon
## apple 0 0 0 0 0 0
## banana 0 0 0 0 0 0
## blackgram 0 0 0 0 0 0
## chickpea 0 0 0 0 0 0
## coconut 0 0 0 0 0 0
## coffee 0 0 0 0 0 0
## cotton 0 0 0 0 0 0
## grapes 0 0 0 0 0 0
## jute 0 0 0 0 2 0
## kidneybeans 0 0 1 0 0 0
## lentil 0 0 0 0 0 0
## maize 0 0 0 0 0 0
## mango 0 0 0 0 0 0
## mothbeans 0 0 0 0 0 0
## mungbean 0 0 0 0 0 0
## muskmelon 0 1 0 0 0 0
## orange 25 0 0 0 0 0
## papaya 0 24 0 0 0 0
## pigeonpeas 0 0 24 0 0 0
## pomegranate 0 0 0 25 0 0
## rice 0 0 0 0 23 0
## watermelon 0 0 0 0 0 25
##
## Overall Statistics
##
## Accuracy : 0.9327
## 95% CI : (0.9085, 0.9522)
## No Information Rate : 0.0455
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9295
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: apple Class: banana Class: blackgram
## Sensitivity 1.00000 0.96000 0.88000
## Specificity 0.99810 1.00000 0.99429
## Pos Pred Value 0.96154 1.00000 0.88000
## Neg Pred Value 1.00000 0.99810 0.99429
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04364 0.04000
## Detection Prevalence 0.04727 0.04364 0.04545
## Balanced Accuracy 0.99905 0.98000 0.93714
## Class: chickpea Class: coconut Class: coffee Class: cotton
## Sensitivity 1.00000 1.00000 1.00000 0.96000
## Specificity 1.00000 1.00000 1.00000 0.99619
## Pos Pred Value 1.00000 1.00000 1.00000 0.92308
## Neg Pred Value 1.00000 1.00000 1.00000 0.99809
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04545 0.04545 0.04364
## Detection Prevalence 0.04545 0.04545 0.04545 0.04727
## Balanced Accuracy 1.00000 1.00000 1.00000 0.97810
## Class: grapes Class: jute Class: kidneybeans Class: lentil
## Sensitivity 0.96000 0.76000 0.92000 0.92000
## Specificity 1.00000 0.99429 0.99810 0.99429
## Pos Pred Value 1.00000 0.86364 0.95833 0.88462
## Neg Pred Value 0.99810 0.98864 0.99620 0.99618
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.04364 0.03455 0.04182 0.04182
## Detection Prevalence 0.04364 0.04000 0.04364 0.04727
## Balanced Accuracy 0.98000 0.87714 0.95905 0.95714
## Class: maize Class: mango Class: mothbeans Class: mungbean
## Sensitivity 0.64000 1.00000 0.72000 0.96000
## Specificity 0.98667 1.00000 0.98476 0.99810
## Pos Pred Value 0.69565 1.00000 0.69231 0.96000
## Neg Pred Value 0.98292 1.00000 0.98664 0.99810
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.02909 0.04545 0.03273 0.04364
## Detection Prevalence 0.04182 0.04545 0.04727 0.04545
## Balanced Accuracy 0.81333 1.00000 0.85238 0.97905
## Class: muskmelon Class: orange Class: papaya
## Sensitivity 1.00000 1.00000 0.96000
## Specificity 0.99810 1.00000 0.99238
## Pos Pred Value 0.96154 1.00000 0.85714
## Neg Pred Value 1.00000 1.00000 0.99808
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04545 0.04364
## Detection Prevalence 0.04727 0.04545 0.05091
## Balanced Accuracy 0.99905 1.00000 0.97619
## Class: pigeonpeas Class: pomegranate Class: rice
## Sensitivity 0.96000 1.00000 0.92000
## Specificity 0.99810 1.00000 0.99619
## Pos Pred Value 0.96000 1.00000 0.92000
## Neg Pred Value 0.99810 1.00000 0.99619
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04364 0.04545 0.04182
## Detection Prevalence 0.04545 0.04545 0.04545
## Balanced Accuracy 0.97905 1.00000 0.95810
## Class: watermelon
## Sensitivity 1.00000
## Specificity 1.00000
## Pos Pred Value 1.00000
## Neg Pred Value 1.00000
## Prevalence 0.04545
## Detection Rate 0.04545
## Detection Prevalence 0.04545
## Balanced Accuracy 1.00000
accuracy <- conf_matrix$overall['Accuracy']
print(paste("Accuracy: ", accuracy))
## [1] "Accuracy: 0.932727272727273"
Le modèle KNN est moins sensible aux corrélations entre les variables par rapport à la régression logistique. Cependant, si deux variables corrélées sont incluses dans le calcul des distances, cela peut biaiser la notion de proximité, car ces deux variables auront un poids plus important. Pour évaluer l’impact de cette redondance sur la performance de notre modèle KNN, nous allons procéder à l’élimination successive de la variable Phosphore en faveur du Potassium, puis de Potassium en faveur de Phosphore. Cette approche nous permettra de déterminer si la suppression de l’une de ces variables corrélées améliore la performance du modèle et réduit l’effet négatif d’une information redondante sur la précision des prédictions.
## Suppression de la variable P
data.trainP <- data.train[,-2]
data.testP <- data.test[,-2]
tuneGrid <- expand.grid(k = 1:20)
set.seed(123)
# Entraîner le modèle Knn avec la validation croisée
knn_model_P <- train(label ~ .,
data = data.trainP,
method = "knn",
tuneGrid = tuneGrid, ## test les valeurs de k de 1 à 20
trControl = ctrl)
# Meilleur k trouvé
best_k <- knn_model_P$bestTune
print(best_k)
## k
## 1 1
# Prédire avec le meilleur modèle
pred_knn_P <- predict(knn_model_P, newdata = data.testP)
# Accuracy
conf_matrix_P <- confusionMatrix(pred_knn_P, data.testP$label)
confusionMatrix(pred_knn_P, data.testP$label)
## Confusion Matrix and Statistics
##
## Reference
## Prediction apple banana blackgram chickpea coconut coffee cotton grapes jute
## apple 25 0 0 0 0 0 0 0 0
## banana 0 25 0 0 0 0 0 0 0
## blackgram 0 0 23 0 0 0 0 0 0
## chickpea 0 0 0 25 0 0 0 0 0
## coconut 0 0 0 0 25 0 0 0 0
## coffee 0 0 0 0 0 25 0 0 0
## cotton 0 0 0 0 0 0 24 0 0
## grapes 0 0 0 0 0 0 0 25 0
## jute 0 0 0 0 0 0 0 0 22
## kidneybeans 0 0 0 0 0 0 0 0 0
## lentil 0 0 0 0 0 0 0 0 0
## maize 0 0 0 0 0 0 1 0 0
## mango 0 0 0 0 0 0 0 0 0
## mothbeans 0 0 2 0 0 0 0 0 0
## mungbean 0 0 0 0 0 0 0 0 0
## muskmelon 0 0 0 0 0 0 0 0 0
## orange 0 0 0 0 0 0 0 0 0
## papaya 0 0 0 0 0 0 0 0 0
## pigeonpeas 0 0 0 0 0 0 0 0 0
## pomegranate 0 0 0 0 0 0 0 0 0
## rice 0 0 0 0 0 0 0 0 3
## watermelon 0 0 0 0 0 0 0 0 0
## Reference
## Prediction kidneybeans lentil maize mango mothbeans mungbean muskmelon
## apple 0 0 0 0 0 0 0
## banana 0 0 0 0 0 0 0
## blackgram 0 0 1 0 2 0 0
## chickpea 0 0 0 0 0 0 0
## coconut 0 0 0 0 0 0 0
## coffee 0 0 0 0 0 0 0
## cotton 0 0 0 0 0 0 0
## grapes 0 0 0 0 0 0 0
## jute 0 0 0 0 0 0 0
## kidneybeans 25 0 0 0 0 0 0
## lentil 0 22 0 0 3 0 0
## maize 0 0 24 0 0 0 0
## mango 0 0 0 24 0 0 0
## mothbeans 0 3 0 0 20 0 0
## mungbean 0 0 0 0 0 25 0
## muskmelon 0 0 0 0 0 0 25
## orange 0 0 0 0 0 0 0
## papaya 0 0 0 0 0 0 0
## pigeonpeas 0 0 0 1 0 0 0
## pomegranate 0 0 0 0 0 0 0
## rice 0 0 0 0 0 0 0
## watermelon 0 0 0 0 0 0 0
## Reference
## Prediction orange papaya pigeonpeas pomegranate rice watermelon
## apple 0 0 0 0 0 0
## banana 0 0 0 0 0 0
## blackgram 0 0 0 0 0 0
## chickpea 0 0 0 0 0 0
## coconut 0 0 0 0 0 0
## coffee 0 0 0 0 0 0
## cotton 0 0 0 0 0 0
## grapes 0 0 0 0 0 0
## jute 0 0 0 0 0 0
## kidneybeans 0 0 0 0 0 0
## lentil 0 0 0 0 0 0
## maize 0 0 0 0 0 0
## mango 0 0 3 0 0 0
## mothbeans 0 0 0 0 0 0
## mungbean 0 0 0 0 0 0
## muskmelon 0 0 0 0 0 0
## orange 25 0 0 0 0 0
## papaya 0 25 0 0 0 0
## pigeonpeas 0 0 22 0 0 0
## pomegranate 0 0 0 25 0 0
## rice 0 0 0 0 25 0
## watermelon 0 0 0 0 0 25
##
## Overall Statistics
##
## Accuracy : 0.9655
## 95% CI : (0.9466, 0.9791)
## No Information Rate : 0.0455
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9638
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: apple Class: banana Class: blackgram
## Sensitivity 1.00000 1.00000 0.92000
## Specificity 1.00000 1.00000 0.99429
## Pos Pred Value 1.00000 1.00000 0.88462
## Neg Pred Value 1.00000 1.00000 0.99618
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04545 0.04182
## Detection Prevalence 0.04545 0.04545 0.04727
## Balanced Accuracy 1.00000 1.00000 0.95714
## Class: chickpea Class: coconut Class: coffee Class: cotton
## Sensitivity 1.00000 1.00000 1.00000 0.96000
## Specificity 1.00000 1.00000 1.00000 1.00000
## Pos Pred Value 1.00000 1.00000 1.00000 1.00000
## Neg Pred Value 1.00000 1.00000 1.00000 0.99810
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04545 0.04545 0.04364
## Detection Prevalence 0.04545 0.04545 0.04545 0.04364
## Balanced Accuracy 1.00000 1.00000 1.00000 0.98000
## Class: grapes Class: jute Class: kidneybeans Class: lentil
## Sensitivity 1.00000 0.88000 1.00000 0.88000
## Specificity 1.00000 1.00000 1.00000 0.99429
## Pos Pred Value 1.00000 1.00000 1.00000 0.88000
## Neg Pred Value 1.00000 0.99432 1.00000 0.99429
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04000 0.04545 0.04000
## Detection Prevalence 0.04545 0.04000 0.04545 0.04545
## Balanced Accuracy 1.00000 0.94000 1.00000 0.93714
## Class: maize Class: mango Class: mothbeans Class: mungbean
## Sensitivity 0.96000 0.96000 0.80000 1.00000
## Specificity 0.99810 0.99429 0.99048 1.00000
## Pos Pred Value 0.96000 0.88889 0.80000 1.00000
## Neg Pred Value 0.99810 0.99809 0.99048 1.00000
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.04364 0.04364 0.03636 0.04545
## Detection Prevalence 0.04545 0.04909 0.04545 0.04545
## Balanced Accuracy 0.97905 0.97714 0.89524 1.00000
## Class: muskmelon Class: orange Class: papaya
## Sensitivity 1.00000 1.00000 1.00000
## Specificity 1.00000 1.00000 1.00000
## Pos Pred Value 1.00000 1.00000 1.00000
## Neg Pred Value 1.00000 1.00000 1.00000
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04545 0.04545
## Detection Prevalence 0.04545 0.04545 0.04545
## Balanced Accuracy 1.00000 1.00000 1.00000
## Class: pigeonpeas Class: pomegranate Class: rice
## Sensitivity 0.88000 1.00000 1.00000
## Specificity 0.99810 1.00000 0.99429
## Pos Pred Value 0.95652 1.00000 0.89286
## Neg Pred Value 0.99431 1.00000 1.00000
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04000 0.04545 0.04545
## Detection Prevalence 0.04182 0.04545 0.05091
## Balanced Accuracy 0.93905 1.00000 0.99714
## Class: watermelon
## Sensitivity 1.00000
## Specificity 1.00000
## Pos Pred Value 1.00000
## Neg Pred Value 1.00000
## Prevalence 0.04545
## Detection Rate 0.04545
## Detection Prevalence 0.04545
## Balanced Accuracy 1.00000
accuracy_P <- conf_matrix_P$overall['Accuracy']
print(paste("Accuracy: ", accuracy_P)) ## Accuracy = 0.97
## [1] "Accuracy: 0.965454545454545"
## Suppression de la variable K
data.trainK <- data.train[,-3]
data.testK <- data.test[,-3]
tuneGrid <- expand.grid(k = 1:20)
set.seed(123)
# Entraîner le modèle Knn avec la validation croisée
knn_model_K <- train(label ~ .,
data = data.trainK,
method = "knn",
tuneGrid = tuneGrid, ## test les valeurs de k de 1 à 20
trControl = ctrl)
# Meilleur k trouvé
best_k <- knn_model_K$bestTune
print(best_k)
## k
## 1 1
# Prédire avec le meilleur modèle
pred_knn_K <- predict(knn_model_K, newdata = data.testK)
# Accuracy
conf_matrix_K <- confusionMatrix(pred_knn_K, data.testK$label)
confusionMatrix(pred_knn_K, data.testK$label)
## Confusion Matrix and Statistics
##
## Reference
## Prediction apple banana blackgram chickpea coconut coffee cotton grapes jute
## apple 25 0 0 0 0 0 0 0 0
## banana 0 25 0 0 0 0 0 0 0
## blackgram 0 0 25 0 0 0 0 0 0
## chickpea 0 0 0 25 0 0 0 0 0
## coconut 0 0 0 0 25 0 0 0 0
## coffee 0 0 0 0 0 25 0 0 0
## cotton 0 0 0 0 0 0 24 0 0
## grapes 0 0 0 0 0 0 0 25 0
## jute 0 0 0 0 0 0 0 0 19
## kidneybeans 0 0 0 0 0 0 0 0 0
## lentil 0 0 0 0 0 0 0 0 0
## maize 0 0 0 0 0 0 1 0 0
## mango 0 0 0 0 0 0 0 0 0
## mothbeans 0 0 0 0 0 0 0 0 0
## mungbean 0 0 0 0 0 0 0 0 0
## muskmelon 0 0 0 0 0 0 0 0 0
## orange 0 0 0 0 0 0 0 0 0
## papaya 0 0 0 0 0 0 0 0 1
## pigeonpeas 0 0 0 0 0 0 0 0 0
## pomegranate 0 0 0 0 0 0 0 0 0
## rice 0 0 0 0 0 0 0 0 5
## watermelon 0 0 0 0 0 0 0 0 0
## Reference
## Prediction kidneybeans lentil maize mango mothbeans mungbean muskmelon
## apple 0 0 0 0 0 0 0
## banana 0 0 0 0 0 0 0
## blackgram 0 1 0 0 0 0 0
## chickpea 2 0 0 0 0 0 0
## coconut 0 0 0 0 0 0 0
## coffee 0 0 0 0 0 0 0
## cotton 0 0 1 0 0 0 0
## grapes 0 0 0 0 0 0 0
## jute 0 0 0 0 0 0 0
## kidneybeans 23 0 0 0 0 0 0
## lentil 0 24 0 0 2 0 0
## maize 0 0 24 0 0 0 0
## mango 0 0 0 25 0 0 0
## mothbeans 0 0 0 0 23 0 0
## mungbean 0 0 0 0 0 25 0
## muskmelon 0 0 0 0 0 0 25
## orange 0 0 0 0 0 0 0
## papaya 0 0 0 0 0 0 0
## pigeonpeas 0 0 0 0 0 0 0
## pomegranate 0 0 0 0 0 0 0
## rice 0 0 0 0 0 0 0
## watermelon 0 0 0 0 0 0 0
## Reference
## Prediction orange papaya pigeonpeas pomegranate rice watermelon
## apple 0 0 0 0 0 0
## banana 0 0 0 0 0 0
## blackgram 0 0 0 0 0 0
## chickpea 0 0 0 0 0 0
## coconut 0 0 0 0 0 0
## coffee 0 0 0 0 0 0
## cotton 0 0 0 0 0 0
## grapes 0 0 0 0 0 0
## jute 0 0 0 0 1 0
## kidneybeans 0 0 0 0 0 0
## lentil 0 0 0 0 0 0
## maize 0 0 0 0 0 0
## mango 0 0 0 0 0 0
## mothbeans 0 0 0 0 0 0
## mungbean 0 0 0 0 0 0
## muskmelon 0 0 0 0 0 0
## orange 11 0 0 4 0 0
## papaya 0 25 0 0 0 0
## pigeonpeas 0 0 25 0 0 0
## pomegranate 14 0 0 21 0 0
## rice 0 0 0 0 24 0
## watermelon 0 0 0 0 0 25
##
## Overall Statistics
##
## Accuracy : 0.9418
## 95% CI : (0.9189, 0.9599)
## No Information Rate : 0.0455
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.939
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: apple Class: banana Class: blackgram
## Sensitivity 1.00000 1.00000 1.00000
## Specificity 1.00000 1.00000 0.99810
## Pos Pred Value 1.00000 1.00000 0.96154
## Neg Pred Value 1.00000 1.00000 1.00000
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04545 0.04545
## Detection Prevalence 0.04545 0.04545 0.04727
## Balanced Accuracy 1.00000 1.00000 0.99905
## Class: chickpea Class: coconut Class: coffee Class: cotton
## Sensitivity 1.00000 1.00000 1.00000 0.96000
## Specificity 0.99619 1.00000 1.00000 0.99810
## Pos Pred Value 0.92593 1.00000 1.00000 0.96000
## Neg Pred Value 1.00000 1.00000 1.00000 0.99810
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04545 0.04545 0.04364
## Detection Prevalence 0.04909 0.04545 0.04545 0.04545
## Balanced Accuracy 0.99810 1.00000 1.00000 0.97905
## Class: grapes Class: jute Class: kidneybeans Class: lentil
## Sensitivity 1.00000 0.76000 0.92000 0.96000
## Specificity 1.00000 0.99810 1.00000 0.99619
## Pos Pred Value 1.00000 0.95000 1.00000 0.92308
## Neg Pred Value 1.00000 0.98868 0.99620 0.99809
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.03455 0.04182 0.04364
## Detection Prevalence 0.04545 0.03636 0.04182 0.04727
## Balanced Accuracy 1.00000 0.87905 0.96000 0.97810
## Class: maize Class: mango Class: mothbeans Class: mungbean
## Sensitivity 0.96000 1.00000 0.92000 1.00000
## Specificity 0.99810 1.00000 1.00000 1.00000
## Pos Pred Value 0.96000 1.00000 1.00000 1.00000
## Neg Pred Value 0.99810 1.00000 0.99620 1.00000
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.04364 0.04545 0.04182 0.04545
## Detection Prevalence 0.04545 0.04545 0.04182 0.04545
## Balanced Accuracy 0.97905 1.00000 0.96000 1.00000
## Class: muskmelon Class: orange Class: papaya
## Sensitivity 1.00000 0.44000 1.00000
## Specificity 1.00000 0.99238 0.99810
## Pos Pred Value 1.00000 0.73333 0.96154
## Neg Pred Value 1.00000 0.97383 1.00000
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.02000 0.04545
## Detection Prevalence 0.04545 0.02727 0.04727
## Balanced Accuracy 1.00000 0.71619 0.99905
## Class: pigeonpeas Class: pomegranate Class: rice
## Sensitivity 1.00000 0.84000 0.96000
## Specificity 1.00000 0.97333 0.99048
## Pos Pred Value 1.00000 0.60000 0.82759
## Neg Pred Value 1.00000 0.99223 0.99808
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.03818 0.04364
## Detection Prevalence 0.04545 0.06364 0.05273
## Balanced Accuracy 1.00000 0.90667 0.97524
## Class: watermelon
## Sensitivity 1.00000
## Specificity 1.00000
## Pos Pred Value 1.00000
## Neg Pred Value 1.00000
## Prevalence 0.04545
## Detection Rate 0.04545
## Detection Prevalence 0.04545
## Balanced Accuracy 1.00000
accuracy_K <- conf_matrix_K$overall['Accuracy']
print(paste("Accuracy: ", accuracy_K)) ## Accuracy = 0.94
## [1] "Accuracy: 0.941818181818182"
On observe ici que l’accuracy du modèle n’augmente pas lorsqu’on supprime les variables. Au contrairement, elle diminue, en particulier lorsqu’on supprime la variable potassium. Il est donc préférable de garder les deux variables dans le modèle.
On va procéder à une sélection de variables afin de déterminer si toutes les variables sont intéressantes à garder pour la construction du modèle knn. Pour ce faire, nous allons utiliser la méthode RFE (Recursive Feature Elimination). La méthode RFE commence par entrainer le modèle knn sur l’ensemble complet des variables explicatives. Une fois le modèle entraîné, l’importance de chaque variable est évalué à l’aide d’une validation croisée. Pour ce faire, RFE regarde la variation de la performance du modèle lorsque chaque variable est retirée. Si la suppression d’une caractéristique entraîne une baisse significative de la performance, cela indique que cette caractéristique est importante. Inversement, si la performance reste relativement stable après la suppression d’une caractéristique, cela suggère que celle-ci a peu d’importance. Le processus d’entraînement, d’évaluation et d’élimination est répété jusqu’à ce l’élimination n’améliore plus la performance (Kuhn, 2019).
set.seed(123)
# Évaluer l'importance des variables avec RFE
rfe_ctrl <- rfeControl(functions = caretFuncs,
method = "cv",
number = 10)
# Effectuer l'élimination récursive des variables
rfe_results <- rfe(data.train[, -ncol(data.train)],
data.train$label,
sizes = c(1:7),
rfeControl = rfe_ctrl,
method = "knn")
# Résumé des résultats
print(rfe_results)
##
## Recursive feature selection
##
## Outer resampling method: Cross-Validated (10 fold)
##
## Resampling performance over subset size:
##
## Variables Accuracy Kappa AccuracySD KappaSD Selected
## 1 0.2294 0.1926 0.026056 0.027248
## 2 0.5098 0.4864 0.045197 0.047310
## 3 0.6947 0.6801 0.025804 0.027004
## 4 0.9045 0.8999 0.022072 0.023128
## 5 0.9165 0.9125 0.017878 0.018738
## 6 0.9807 0.9798 0.008802 0.009222 *
## 7 0.9801 0.9792 0.007928 0.008306
##
## The top 5 variables (out of 6):
## humidity, K, P, temperature, N
# Visualiser l'importance des variables
plot(rfe_results, type = c("g", "o"))
# Variables sélectionnées
selected_vars <- predictors(rfe_results)
print(selected_vars)
## [1] "humidity" "K" "P" "temperature" "N"
## [6] "rainfall"
Ici, les variables les plus importantes sont : l’humidité, le taux de potassium, le taux de phosphore, le taux de nitrate et la température. Toutefois, ajouter la variable taux de précipitation semble intéressant pour avoir une accuracy maximale égale à 0,9807. Cette accuracy est très proche de l’accuracy du modèle prenant en compte toutes les variables.
# Entrainement du modèle simplifié
data.train_S <- data.train[,-6]
data.test_S <- data.test[,-6]
tuneGrid <- expand.grid(k = 1:20)
set.seed(123)
# Entraîner le modèle Knn avec la validation croisée
knn_model_S <- train(label ~ .,
data = data.train_S,
method = "knn",
tuneGrid = tuneGrid, ## test les valeurs de k de 1 à 20
trControl = ctrl)
# Meilleur k trouvé
best_k <- knn_model_S$bestTune
print(best_k)
## k
## 1 1
# Prédire avec le meilleur modèle
pred_knn_S <- predict(knn_model_S, newdata = data.test_S)
# Accuracy
conf_matrix <- confusionMatrix(pred_knn_S, data.test_S$label)
confusionMatrix(pred_knn_S, data.test_S$label)
## Confusion Matrix and Statistics
##
## Reference
## Prediction apple banana blackgram chickpea coconut coffee cotton grapes jute
## apple 25 0 0 0 0 0 0 0 0
## banana 0 25 0 0 0 0 0 0 0
## blackgram 0 0 25 0 0 0 0 0 0
## chickpea 0 0 0 25 0 0 0 0 0
## coconut 0 0 0 0 25 0 0 0 0
## coffee 0 0 0 0 0 25 0 0 0
## cotton 0 0 0 0 0 0 24 0 0
## grapes 0 0 0 0 0 0 0 25 0
## jute 0 0 0 0 0 0 0 0 19
## kidneybeans 0 0 0 0 0 0 0 0 0
## lentil 0 0 0 0 0 0 0 0 0
## maize 0 0 0 0 0 0 1 0 0
## mango 0 0 0 0 0 0 0 0 0
## mothbeans 0 0 0 0 0 0 0 0 0
## mungbean 0 0 0 0 0 0 0 0 0
## muskmelon 0 0 0 0 0 0 0 0 0
## orange 0 0 0 0 0 0 0 0 0
## papaya 0 0 0 0 0 0 0 0 1
## pigeonpeas 0 0 0 0 0 0 0 0 0
## pomegranate 0 0 0 0 0 0 0 0 0
## rice 0 0 0 0 0 0 0 0 5
## watermelon 0 0 0 0 0 0 0 0 0
## Reference
## Prediction kidneybeans lentil maize mango mothbeans mungbean muskmelon
## apple 0 0 0 0 0 0 0
## banana 0 0 0 0 0 0 0
## blackgram 0 0 0 0 0 0 0
## chickpea 0 0 0 0 0 0 0
## coconut 0 0 0 0 0 0 0
## coffee 0 0 0 0 0 0 0
## cotton 0 0 0 0 0 0 0
## grapes 0 0 0 0 0 0 0
## jute 0 0 0 0 0 0 0
## kidneybeans 25 0 0 0 0 0 0
## lentil 0 25 0 0 2 0 0
## maize 0 0 25 0 0 0 0
## mango 0 0 0 25 0 0 0
## mothbeans 0 0 0 0 23 0 0
## mungbean 0 0 0 0 0 25 0
## muskmelon 0 0 0 0 0 0 25
## orange 0 0 0 0 0 0 0
## papaya 0 0 0 0 0 0 0
## pigeonpeas 0 0 0 0 0 0 0
## pomegranate 0 0 0 0 0 0 0
## rice 0 0 0 0 0 0 0
## watermelon 0 0 0 0 0 0 0
## Reference
## Prediction orange papaya pigeonpeas pomegranate rice watermelon
## apple 0 0 0 0 0 0
## banana 0 0 0 0 0 0
## blackgram 0 0 0 0 0 0
## chickpea 0 0 0 0 0 0
## coconut 0 0 0 0 0 0
## coffee 0 0 0 0 0 0
## cotton 0 0 0 0 0 0
## grapes 0 0 0 0 0 0
## jute 0 0 0 0 1 0
## kidneybeans 0 0 0 0 0 0
## lentil 0 0 0 0 0 0
## maize 0 0 0 0 0 0
## mango 0 0 0 0 0 0
## mothbeans 0 0 0 0 0 0
## mungbean 0 0 0 0 0 0
## muskmelon 0 0 0 0 0 0
## orange 25 0 0 0 0 0
## papaya 0 24 0 0 0 0
## pigeonpeas 0 0 25 0 0 0
## pomegranate 0 0 0 25 0 0
## rice 0 1 0 0 24 0
## watermelon 0 0 0 0 0 25
##
## Overall Statistics
##
## Accuracy : 0.98
## 95% CI : (0.9645, 0.99)
## No Information Rate : 0.0455
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.979
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: apple Class: banana Class: blackgram
## Sensitivity 1.00000 1.00000 1.00000
## Specificity 1.00000 1.00000 1.00000
## Pos Pred Value 1.00000 1.00000 1.00000
## Neg Pred Value 1.00000 1.00000 1.00000
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04545 0.04545
## Detection Prevalence 0.04545 0.04545 0.04545
## Balanced Accuracy 1.00000 1.00000 1.00000
## Class: chickpea Class: coconut Class: coffee Class: cotton
## Sensitivity 1.00000 1.00000 1.00000 0.96000
## Specificity 1.00000 1.00000 1.00000 1.00000
## Pos Pred Value 1.00000 1.00000 1.00000 1.00000
## Neg Pred Value 1.00000 1.00000 1.00000 0.99810
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04545 0.04545 0.04364
## Detection Prevalence 0.04545 0.04545 0.04545 0.04364
## Balanced Accuracy 1.00000 1.00000 1.00000 0.98000
## Class: grapes Class: jute Class: kidneybeans Class: lentil
## Sensitivity 1.00000 0.76000 1.00000 1.00000
## Specificity 1.00000 0.99810 1.00000 0.99619
## Pos Pred Value 1.00000 0.95000 1.00000 0.92593
## Neg Pred Value 1.00000 0.98868 1.00000 1.00000
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.03455 0.04545 0.04545
## Detection Prevalence 0.04545 0.03636 0.04545 0.04909
## Balanced Accuracy 1.00000 0.87905 1.00000 0.99810
## Class: maize Class: mango Class: mothbeans Class: mungbean
## Sensitivity 1.00000 1.00000 0.92000 1.00000
## Specificity 0.99810 1.00000 1.00000 1.00000
## Pos Pred Value 0.96154 1.00000 1.00000 1.00000
## Neg Pred Value 1.00000 1.00000 0.99620 1.00000
## Prevalence 0.04545 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04545 0.04182 0.04545
## Detection Prevalence 0.04727 0.04545 0.04182 0.04545
## Balanced Accuracy 0.99905 1.00000 0.96000 1.00000
## Class: muskmelon Class: orange Class: papaya
## Sensitivity 1.00000 1.00000 0.96000
## Specificity 1.00000 1.00000 0.99810
## Pos Pred Value 1.00000 1.00000 0.96000
## Neg Pred Value 1.00000 1.00000 0.99810
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04545 0.04364
## Detection Prevalence 0.04545 0.04545 0.04545
## Balanced Accuracy 1.00000 1.00000 0.97905
## Class: pigeonpeas Class: pomegranate Class: rice
## Sensitivity 1.00000 1.00000 0.96000
## Specificity 1.00000 1.00000 0.98857
## Pos Pred Value 1.00000 1.00000 0.80000
## Neg Pred Value 1.00000 1.00000 0.99808
## Prevalence 0.04545 0.04545 0.04545
## Detection Rate 0.04545 0.04545 0.04364
## Detection Prevalence 0.04545 0.04545 0.05455
## Balanced Accuracy 1.00000 1.00000 0.97429
## Class: watermelon
## Sensitivity 1.00000
## Specificity 1.00000
## Pos Pred Value 1.00000
## Neg Pred Value 1.00000
## Prevalence 0.04545
## Detection Rate 0.04545
## Detection Prevalence 0.04545
## Balanced Accuracy 1.00000
accuracy <- conf_matrix$overall['Accuracy']
print(paste("Accuracy: ", accuracy))
## [1] "Accuracy: 0.98"
Etant donné le grand nombre de culture, il est probable que certaines cultures soient très similaires en termes de caractéristiques mesurées. Par exemple, deux cultures qui se développent dans des conditions de sol et climatiques très proches pourraient partager des profils de sol quasi identiques. Cela rendrait difficile la distinction entre ces cultures pour le modèle.
require(FactoMineR)
resPCA2 <- PCA(dta, scale.unit = TRUE, quali.sup = 8)
plot(resPCA2,habillage=8,label="quali")
Une classification ascendante hiérarchique est utilisée pour déterminer quelles cultures sont similaires. L’objectif est par la suite de choisir un sous-ensembles de cultures similaires pour évaluer la capacité des modèles à différencier les cultures similaires.
library (FactoMineR)
res_PCA <- PCA(dta[,-8], scale.unit = TRUE)
res_HCPC <- HCPC(res_PCA, nb.clust = 3) ## 3 clusters
dta2 <- dta
dta2$clust <- res_HCPC$data.clust$clust
table_clusters_label <- table(dta2$label, dta2$clust)
print(table_clusters_label)
##
## 1 2 3
## apple 0 0 100
## banana 100 0 0
## blackgram 0 100 0
## chickpea 0 100 0
## coconut 100 0 0
## coffee 100 0 0
## cotton 100 0 0
## grapes 0 0 100
## jute 100 0 0
## kidneybeans 0 100 0
## lentil 0 100 0
## maize 79 21 0
## mango 0 100 0
## mothbeans 0 100 0
## mungbean 5 95 0
## muskmelon 100 0 0
## orange 99 1 0
## papaya 99 1 0
## pigeonpeas 5 95 0
## pomegranate 92 8 0
## rice 100 0 0
## watermelon 100 0 0
## Cluster 1 = Banana, Coconut, Coffee, Cotton, Jute, Maize, Muskmelon, Orange, Papaya, Pomegrade, Rice, Watermelon
## Cluster 2 = Blackgram, Chickpea, Kidneybeans, Lentil, Mango, Mothbeans, Mungbean, Pigeonpeas
## Cluster 3 = Apple, Grapes
res_HCPC$desc.var$quanti$`1`
## v.test Mean in category Overall mean sd in category Overall sd
## N 29.896741 72.44953 50.55182 36.1291359 36.9089426
## humidity 26.205040 83.05699 71.48178 11.0149721 22.2587511
## rainfall 16.925562 121.91898 103.46366 62.5047087 54.9458966
## ph 2.459776 6.50725 6.46948 0.5558402 0.7737618
## K -12.098751 35.99152 48.14909 13.6615430 50.6364183
## P -24.766596 37.15437 53.36273 21.7610959 32.9783851
## p.value
## N 2.169424e-196
## humidity 2.328226e-151
## rainfall 2.915491e-64
## ph 1.390239e-02
## K 1.072301e-33
## P 2.054326e-135
res_HCPC$desc.var$quanti$`2`
## v.test Mean in category Overall mean sd in category Overall sd
## P 4.152397 57.147381 53.36273 16.370228 32.9783851
## ph 3.064621 6.535016 6.46948 1.037016 0.7737618
## temperature 2.197248 25.923677 25.61624 5.216591 5.0625976
## K -13.957348 28.616322 48.14909 19.726144 50.6364183
## rainfall -15.474548 79.964558 103.46366 35.794826 54.9458966
## N -24.006691 26.063337 50.55182 15.901061 36.9089426
## humidity -33.207386 51.053423 71.48178 21.843505 22.2587511
## p.value
## P 3.290111e-05
## ph 2.179460e-03
## temperature 2.800272e-02
## K 2.837995e-44
## rainfall 5.153361e-54
## N 2.367540e-127
## humidity 8.422269e-242
res_HCPC$desc.var$quanti$`3`
## v.test Mean in category Overall mean sd in category Overall sd
## K 44.469990 200.00000 48.14909 3.2787193 50.6364183
## P 35.978234 133.37500 53.36273 7.8895104 32.9783851
## humidity 10.407899 87.10431 71.48178 5.3927887 22.2587511
## rainfall -3.327766 91.13330 103.46366 22.1914558 54.9458966
## temperature -6.959578 23.24026 25.61624 6.9033742 5.0625976
## ph -9.422977 5.97780 6.46948 0.2866251 0.7737618
## N -11.475374 21.99000 50.55182 12.1663429 36.9089426
## p.value
## K 0.000000e+00
## P 1.832097e-283
## humidity 2.282018e-25
## rainfall 8.754532e-04
## temperature 3.412932e-12
## ph 4.384775e-21
## N 1.754169e-30
## Graphique
require (factoextra)
## Loading required package: factoextra
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
require(ggrepel)
## Loading required package: ggrepel
centres_cultures <- aggregate(res_PCA$ind$coord, by = list(culture = dta$label), FUN = mean)
fviz_cluster(res_HCPC,
geom = "point",
ellipse.type = "convex",
palette = c("#B3CDE3", "#CCEBC5", "#FFD580"), # Couleurs plus claires
ggtheme = theme_minimal(),
main = "Clusters avec centres de gravité des cultures") +
# Ajouter les centres de gravité en points noirs plus petits
geom_point(data = centres_cultures, aes(x = Dim.1, y = Dim.2),
color = "black", size = 2, shape = 16) + # 'shape = 16' pour un point plein et 'size = 1.5' pour réduire la taille
# Afficher les noms des cultures sans chevauchement
geom_text_repel(data = centres_cultures, aes(x = Dim.1, y = Dim.2, label = culture),
color = "black",
size = 4, # Taille des étiquettes
max.overlaps = 10)
Nous obtenons ici trois cluster différents. Le premier cluster inclut les fruits tropicaux et les cultures de rente, tels que la banane, la noix de coco, et le café, qui prospèrent dans des conditions de sol humide, riche en azote, avec des précipitations importantes. Ces cultures sont adaptées à des climats tropicaux où l’abondance d’eau et un sol bien drainé sont essentiels pour une croissance optimale. Le deuxième cluster regroupe la mangue et les légumineuses comme les pois chiches, les haricots mungo. Ces plantes préfèrent un sol relativement pauvre en azote, bien drainé, et faiblement humide. Le dernier cluster comprend des cultures tempérées comme la pomme et le raisin, qui préfèrent un sol riche en potassium et en phosphore, mais pauvre en azote.
Nous avons sélectionné 5 cultures très similaires pour évaluer la capacité de prédiction de nos modèles dans le cas de situations où les cultures présentent des caractéristiques agronomiques et environnementales proches. L’objectif est de vérifier si les modèles peuvent encore discriminer correctement dans des contextes où les variations entre les cultures sont faibles, simulant ainsi des choix agricoles plus délicats à effectuer. Pour ce faire nous avons choisi les cultures suivantes : le jute, le riz, le maïs, le cotton et le café.
# Modèle complet
set.seed(123)
cultures_similaires <- c("coffee", "cotton", "maize", "rice", "jute")
filtered_test_data <- data.test[data.test$label %in% cultures_similaires, ]
filtered_test_data <- droplevels(filtered_test_data)
# Prédiction avec le sous-ensemble de culture
pred_knn_filtered <- predict(knn_model, newdata = filtered_test_data)
# Matrice de confusion et accuracy
pred_knn_filtered <- factor(pred_knn_filtered, levels = levels(filtered_test_data$label))
conf_matrix_filtered <- confusionMatrix(pred_knn_filtered, filtered_test_data$label)
print(conf_matrix_filtered)
## Confusion Matrix and Statistics
##
## Reference
## Prediction coffee cotton jute maize rice
## coffee 25 0 0 0 0
## cotton 0 24 0 0 0
## jute 0 0 19 0 1
## maize 0 1 0 25 0
## rice 0 0 5 0 24
##
## Overall Statistics
##
## Accuracy : 0.9435
## 95% CI : (0.8871, 0.977)
## No Information Rate : 0.2016
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9294
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: coffee Class: cotton Class: jute Class: maize
## Sensitivity 1.0000 0.9600 0.7917 1.0000
## Specificity 1.0000 1.0000 0.9900 0.9899
## Pos Pred Value 1.0000 1.0000 0.9500 0.9615
## Neg Pred Value 1.0000 0.9900 0.9519 1.0000
## Prevalence 0.2016 0.2016 0.1935 0.2016
## Detection Rate 0.2016 0.1935 0.1532 0.2016
## Detection Prevalence 0.2016 0.1935 0.1613 0.2097
## Balanced Accuracy 1.0000 0.9800 0.8908 0.9949
## Class: rice
## Sensitivity 0.9600
## Specificity 0.9495
## Pos Pred Value 0.8276
## Neg Pred Value 0.9895
## Prevalence 0.2016
## Detection Rate 0.1935
## Detection Prevalence 0.2339
## Balanced Accuracy 0.9547
accuracy_filtered <- conf_matrix_filtered$overall['Accuracy']
# Modèle simplifié
filtered_test_data_S <- data.test_S[data.test_S$label %in% cultures_similaires, ]
filtered_test_data_S <- droplevels(filtered_test_data_S)
# Prédiction avec le sous-ensemble de culture
pred_knn_filtered_S <- predict(knn_model_S, newdata = filtered_test_data_S)
# Matrice de confusion et accuracy
pred_knn_filtered_S <- factor(pred_knn_filtered_S, levels = levels(filtered_test_data_S$label))
conf_matrix_filtered_S <- confusionMatrix(pred_knn_filtered_S, filtered_test_data_S$label)
print(conf_matrix_filtered_S)
## Confusion Matrix and Statistics
##
## Reference
## Prediction coffee cotton jute maize rice
## coffee 25 0 0 0 0
## cotton 0 24 0 0 0
## jute 0 0 19 0 1
## maize 0 1 0 25 0
## rice 0 0 5 0 24
##
## Overall Statistics
##
## Accuracy : 0.9435
## 95% CI : (0.8871, 0.977)
## No Information Rate : 0.2016
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9294
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: coffee Class: cotton Class: jute Class: maize
## Sensitivity 1.0000 0.9600 0.7917 1.0000
## Specificity 1.0000 1.0000 0.9900 0.9899
## Pos Pred Value 1.0000 1.0000 0.9500 0.9615
## Neg Pred Value 1.0000 0.9900 0.9519 1.0000
## Prevalence 0.2016 0.2016 0.1935 0.2016
## Detection Rate 0.2016 0.1935 0.1532 0.2016
## Detection Prevalence 0.2016 0.1935 0.1613 0.2097
## Balanced Accuracy 1.0000 0.9800 0.8908 0.9949
## Class: rice
## Sensitivity 0.9600
## Specificity 0.9495
## Pos Pred Value 0.8276
## Neg Pred Value 0.9895
## Prevalence 0.2016
## Detection Rate 0.1935
## Detection Prevalence 0.2339
## Balanced Accuracy 0.9547
accuracy_filtered_S <- conf_matrix_filtered_S$overall['Accuracy']
Le modèle simplifié semble être légèrement plus performant que le modèle complet pour distinguer les cultures similaires avec une accuracy égale à 0,94 contre 0,93.
Plusieurs métriques permettent de mieux comprendre la performance du modèle. Nous allons nous intéresser à la sensibilité et à la spécificité. La sensibilté indique la capacité du modèle à identifier correctement les échantillons appartenant à une classe spécifique. Elle prend la valeur la plus basse pour la classe jute avec une valeur de 79%, indiquant que seulement 79 % des échantillons de jute ont été bien classés. La spécificité mesure la capacité du modèle à identifier correctement les échantillons qui n’appartiennent pas à une classe donnée. Elle est la plus faible pour la classe riz (0,94). On observe ici que le modèle complet confond les cultures riz et jute.
Les performances du modèle simplifié sont quasiment identique à celles du modèle complet. Le modèle simplifié possède un léger avantage sur la classe maïs en capturant 100 % des échantillons correctement. De plus, il est légèrement meilleur en termes de spécificité, avec une amélioration pour la classe coton. Toutefois, le modèle simplifié confond de la même manière les classes jute et riz.
# Modèle de régression logistique
cultures_similaires <- c("coffee", "cotton", "maize", "rice", "jute")
filtered_test_data <- data.test[data.test$label %in% cultures_similaires, ]
filtered_test_data <- droplevels(filtered_test_data)
# Prédiction avec le sous-ensemble de culture
pred_reg_filtered <- predict(reg_model, newdata = filtered_test_data)
# Matrice de confusion et accuracy
pred_reg_filtered <- factor(pred_reg_filtered, levels = levels(filtered_test_data$label))
conf_matrix_filtered <- confusionMatrix(pred_reg_filtered, filtered_test_data$label)
print(conf_matrix_filtered)
## Confusion Matrix and Statistics
##
## Reference
## Prediction coffee cotton jute maize rice
## coffee 24 0 1 0 0
## cotton 0 23 0 3 0
## jute 0 0 12 0 2
## maize 0 0 6 18 0
## rice 0 0 2 0 21
##
## Overall Statistics
##
## Accuracy : 0.875
## 95% CI : (0.7992, 0.9299)
## No Information Rate : 0.2143
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8435
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: coffee Class: cotton Class: jute Class: maize
## Sensitivity 1.0000 1.0000 0.5714 0.8571
## Specificity 0.9886 0.9663 0.9780 0.9341
## Pos Pred Value 0.9600 0.8846 0.8571 0.7500
## Neg Pred Value 1.0000 1.0000 0.9082 0.9659
## Prevalence 0.2143 0.2054 0.1875 0.1875
## Detection Rate 0.2143 0.2054 0.1071 0.1607
## Detection Prevalence 0.2232 0.2321 0.1250 0.2143
## Balanced Accuracy 0.9943 0.9831 0.7747 0.8956
## Class: rice
## Sensitivity 0.9130
## Specificity 0.9775
## Pos Pred Value 0.9130
## Neg Pred Value 0.9775
## Prevalence 0.2054
## Detection Rate 0.1875
## Detection Prevalence 0.2054
## Balanced Accuracy 0.9453
accuracy_filtered <- conf_matrix_filtered$overall['Accuracy']
## Modèle simplifié
pred_reg_filtered <- predict(reg_model_final, newdata = filtered_test_data)
# Matrice de confusion et accuracy
pred_reg_filtered <- factor(pred_reg_filtered, levels = levels(filtered_test_data$label))
conf_matrix_filtered <- confusionMatrix(pred_reg_filtered, filtered_test_data$label)
print(conf_matrix_filtered)
## Confusion Matrix and Statistics
##
## Reference
## Prediction coffee cotton jute maize rice
## coffee 25 0 0 0 0
## cotton 0 24 0 1 0
## jute 0 0 19 0 2
## maize 0 0 0 16 0
## rice 0 0 2 0 23
##
## Overall Statistics
##
## Accuracy : 0.9554
## 95% CI : (0.8989, 0.9853)
## No Information Rate : 0.2232
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9439
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: coffee Class: cotton Class: jute Class: maize
## Sensitivity 1.0000 1.0000 0.9048 0.9412
## Specificity 1.0000 0.9886 0.9780 1.0000
## Pos Pred Value 1.0000 0.9600 0.9048 1.0000
## Neg Pred Value 1.0000 1.0000 0.9780 0.9896
## Prevalence 0.2232 0.2143 0.1875 0.1518
## Detection Rate 0.2232 0.2143 0.1696 0.1429
## Detection Prevalence 0.2232 0.2232 0.1875 0.1429
## Balanced Accuracy 1.0000 0.9943 0.9414 0.9706
## Class: rice
## Sensitivity 0.9200
## Specificity 0.9770
## Pos Pred Value 0.9200
## Neg Pred Value 0.9770
## Prevalence 0.2232
## Detection Rate 0.2054
## Detection Prevalence 0.2232
## Balanced Accuracy 0.9485
accuracy_filtered <- conf_matrix_filtered$overall['Accuracy']
accuracy_filtered
## Accuracy
## 0.9553571
## Graphique :
library(ggplot2)
model_data <- data.frame(
Model = c("Modèle régression complet", "Modèle régression selectionné", "Modèle knn complet", "Modèle knn sélectionné", "Modèle random forest complet", "Modèle random forest sélectionné"),
Accuracy = c(0.85, 0.94, 0.98, 0.98, 0.99, 0.99))
ggplot(model_data, aes(x = Model, y = Accuracy, fill = Model)) +
geom_bar(stat = "identity", width = 0.5) + # Afficher un bar plot
scale_fill_manual(values = c("#0057D9", "#B3CDE3", "#3DAA37", "#A8D5BA", "#FFA500", "#FFDAB9")) + # Couleurs personnalisées
geom_text(aes(label = Accuracy), vjust = -0.5, size = 4) + # Afficher les valeurs sur les barres
ylim(0, 1) + # Limiter l'axe des y entre 0 et 1 (pourcentage)
labs(title = "Comparaison de l'Accuracy des différents modèles",
x = "Modèle",
y = "Accuracy") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5)) # Orientation des étiquettes